PS3 Linux is no more...
authorSam Lantinga <slouken@libsdl.org>
Wed, 19 Jan 2011 22:25:40 -0800
changeset 5048 187d7d446306
parent 5047 edaf3e364a05
child 5049 28003ba91f57
PS3 Linux is no more...
Makefile.in
README.PS3
configure.in
include/SDL_config.h.in
src/video/SDL_sysvideo.h
src/video/SDL_video.c
src/video/ps3/SDL_ps3events.c
src/video/ps3/SDL_ps3events_c.h
src/video/ps3/SDL_ps3modes.c
src/video/ps3/SDL_ps3modes_c.h
src/video/ps3/SDL_ps3render.c
src/video/ps3/SDL_ps3render_c.h
src/video/ps3/SDL_ps3spe.c
src/video/ps3/SDL_ps3spe_c.h
src/video/ps3/SDL_ps3video.c
src/video/ps3/SDL_ps3video.h
src/video/ps3/spulibs/Makefile
src/video/ps3/spulibs/bilin_scaler.c
src/video/ps3/spulibs/fb_writer.c
src/video/ps3/spulibs/spu_common.h
src/video/ps3/spulibs/yuv2rgb.c
--- a/Makefile.in	Wed Jan 19 22:21:31 2011 -0800
+++ b/Makefile.in	Wed Jan 19 22:25:40 2011 -0800
@@ -37,11 +37,6 @@
 SDLMAIN_TARGET = libSDLmain.a
 SDLMAIN_OBJECTS = @SDLMAIN_OBJECTS@
 
-# PS3 SPU programs
-SPU_GCC = @SPU_GCC@
-EMBEDSPU = @EMBEDSPU@
-#include $(srcdir)/src/video/ps3/spulibs/Makefile
-
 DIST = acinclude Android.mk autogen.sh Borland.html Borland.zip BUGS build-scripts configure configure.in COPYING CREDITS include INSTALL Makefile.minimal Makefile.in README* sdl-config.in sdl.m4 sdl.pc.in SDL.spec SDL.spec.in src test TODO VisualC.html VisualC VisualCE Watcom-Win32.zip WhatsNew Xcode Xcode-iPhoneOS
 
 HDRS = \
--- a/README.PS3	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,35 +0,0 @@
-
-SDL on Sony Playstation3
-------------------------
-
-Installation:
-  First, you have to install the Cell SDK
-  - Download the Cell SDK installer RPM and ISO images to
-    a temporary directory such as /tmp/cellsdk.
-  - Mount the image: mount -o loop CellSDK-Devel-Fedora_3.1.0.0.0.iso /tmp/cellsdk
-  - Install the SDK installer: rpm -ivh cell-install-3.1.0-0.0.noarch.rpm
-  - Install the SDK: cd /opt/cell && ./cellsdk --iso /tmp/cellsdkiso install
-
-  You'll than need to install the SPU-libs
-  - Run make ps3-libs && make ps3libs-install
-
-  Finally, install SDL
-  - Go to SDL-1.2/ and build SDL like any other GNU style package.
-  e.g.
-    - Build the configure-script with ./autogen.sh
-    - Configure SDL for your needs: ./configure --enable-video-ps3 ...
-    - Build and install it: make && make install
-
-
-Todo:
-  - Mouse & Keyboard support
-  - On SPU-side the current scaler and converter restrictions are:
-    - resolution has to be a multiple of 8 (will work on that)
-    - scaler/converter only supports the YV12 and IYUV format
-    - the scaler works only bilinear (lanzos would be nice)
-  - Optimize the SPU-program handling on the PPE side
-  - Integrate spumedia in SDL
-
-Have fun!
-  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot ibm [dot] com>
-
--- a/configure.in	Wed Jan 19 22:21:31 2011 -0800
+++ b/configure.in	Wed Jan 19 22:25:40 2011 -0800
@@ -1546,46 +1546,6 @@
     fi
 }
 
-dnl See if we're running on PlayStation 3 Cell hardware
-CheckPS3()
-{
-  AC_ARG_ENABLE(video-ps3,
-                AC_HELP_STRING([--enable-video-ps3], [use PlayStation 3 Cell driver [[default=yes]]]),
-                , enable_video_ps3=yes)
-  if test x$enable_video = xyes -a x$enable_video_ps3 = xyes; then 
-    video_ps3=no
-    AC_CHECK_HEADER([linux/fb.h])
-    AC_CHECK_HEADER([asm/ps3fb.h], [have_ps3fb_hdr=yes], [],
-            [#ifndef _LINUX_TYPES_H
-                #include <linux/types.h>
-            #endif])
-    AC_CHECK_HEADER([libspe2.h], have_libspe2_hdr=yes)
-    AC_CHECK_LIB([spe2], spe_context_create, have_spe2_lib=yes)
-
-    AC_CHECK_PROGS(SPU_GCC, [spu-gcc])
-    AC_CHECK_PROGS(EMBEDSPU, [embedspu])
-
-    have_spu_libs=yes
-    AC_CHECK_LIB([fb_writer_spu], [main], [], [have_spu_libs=no])
-    AC_CHECK_LIB([yuv2rgb_spu], [main], [], [have_spu_libs=no])
-    AC_CHECK_LIB([bilin_scaler_spu], [main], [], [have_spu_libs=no])
-    if test x$have_ps3fb_hdr = xyes -a x$have_libspe2_hdr = xyes -a x$have_spe2_lib = xyes -a "$SPU_GCC" -a "$EMBEDSPU"; then
-        AC_DEFINE(SDL_VIDEO_DRIVER_PS3)
-        video_ps3=yes
-        have_video=yes
-        SOURCES="$SOURCES $srcdir/src/video/ps3/*.c"
-        EXTRA_CFLAGS="$EXTRA_CFLAGS -I/opt/cell/sdk/usr/include"
-        EXTRA_LDFLAGS="$EXTRA_LDFLAGS -L/opt/cell/sdk/usr/lib -lspe2 -lfb_writer_spu -lyuv2rgb_spu -lbilin_scaler_spu"
-
-        if test x$have_spu_libs = xno; then 
-              AC_MSG_WARN([ps3libs missing, please run make ps3libs])
-        fi
-    fi
-    AC_MSG_CHECKING([for PlayStation 3 Cell support])
-    AC_MSG_RESULT([$video_ps3])
-  fi
-}
-
 dnl rcg04172001 Set up the Null video driver.
 CheckDummyVideo()
 {
@@ -2245,7 +2205,6 @@
         CheckX11
         CheckDirectFB
         CheckFusionSound
-        CheckPS3
         CheckOpenGLX11
         CheckInputEvents
         CheckTslib
--- a/include/SDL_config.h.in	Wed Jan 19 22:21:31 2011 -0800
+++ b/include/SDL_config.h.in	Wed Jan 19 22:25:40 2011 -0800
@@ -264,7 +264,6 @@
 #undef SDL_VIDEO_DRIVER_NDS
 #undef SDL_VIDEO_DRIVER_PHOTON
 #undef SDL_VIDEO_DRIVER_QNXGF
-#undef SDL_VIDEO_DRIVER_PS3
 #undef SDL_VIDEO_DRIVER_RISCOS
 #undef SDL_VIDEO_DRIVER_WIN32
 #undef SDL_VIDEO_DRIVER_X11
--- a/src/video/SDL_sysvideo.h	Wed Jan 19 22:21:31 2011 -0800
+++ b/src/video/SDL_sysvideo.h	Wed Jan 19 22:25:40 2011 -0800
@@ -411,9 +411,6 @@
 #if SDL_VIDEO_DRIVER_DIRECTFB
 extern VideoBootStrap DirectFB_bootstrap;
 #endif
-#if SDL_VIDEO_DRIVER_PS3
-extern VideoBootStrap PS3_bootstrap;
-#endif
 #if SDL_VIDEO_DRIVER_WIN32
 extern VideoBootStrap WIN32_bootstrap;
 #endif
--- a/src/video/SDL_video.c	Wed Jan 19 22:21:31 2011 -0800
+++ b/src/video/SDL_video.c	Wed Jan 19 22:25:40 2011 -0800
@@ -65,9 +65,6 @@
 #if SDL_VIDEO_DRIVER_DIRECTFB
     &DirectFB_bootstrap,
 #endif
-#if SDL_VIDEO_DRIVER_PS3
-    &PS3_bootstrap,
-#endif
 #if SDL_VIDEO_DRIVER_WIN32
     &WIN32_bootstrap,
 #endif
--- a/src/video/ps3/SDL_ps3events.c	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#include "../../events/SDL_sysevents.h"
-#include "../../events/SDL_events_c.h"
-
-#include "SDL_ps3video.h"
-#include "SDL_ps3events_c.h"
-
-void
-PS3_PumpEvents(_THIS)
-{
-    /* do nothing. */
-}
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3events_c.h	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#include "SDL_ps3video.h"
-
-extern void PS3_PumpEvents(_THIS);
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3modes.c	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,143 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#include "SDL_ps3video.h"
-
-void
-PS3_InitModes(_THIS)
-{
-    deprintf(1, "+PS3_InitModes()\n");
-    SDL_VideoDisplay display;
-    SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
-    SDL_DisplayMode mode;
-    PS3_DisplayModeData *modedata;
-    unsigned long vid = 0;
-
-    modedata = (PS3_DisplayModeData *) SDL_malloc(sizeof(*modedata));
-    if (!modedata) {
-        return;
-    }
-
-    /* Setting up the DisplayMode based on current settings */
-    struct ps3fb_ioctl_res res;
-    if (ioctl(data->fbdev, PS3FB_IOCTL_SCREENINFO, &res)) {
-        SDL_SetError("Can't get PS3FB_IOCTL_SCREENINFO");
-    }
-    mode.format = SDL_PIXELFORMAT_RGB888;
-    mode.refresh_rate = 0;
-    mode.w = res.xres;
-    mode.h = res.yres;
-
-    /* Setting up driver specific mode data,
-     * Get the current ps3 specific videmode number */
-    if (ioctl(data->fbdev, PS3FB_IOCTL_GETMODE, (unsigned long)&vid)) {
-        SDL_SetError("Can't get PS3FB_IOCTL_GETMODE");
-    }
-    deprintf(2, "PS3FB_IOCTL_GETMODE = %u\n", vid);
-    modedata->mode = vid;
-    mode.driverdata = modedata;
-
-    /* Set display's videomode and add it */
-    SDL_zero(display);
-    display.desktop_mode = mode;
-    display.current_mode = mode;
-
-    SDL_AddVideoDisplay(&display);
-    deprintf(1, "-PS3_InitModes()\n");
-}
-
-/* DisplayModes available on the PS3 */
-static SDL_DisplayMode ps3fb_modedb[] = {
-    /* VESA */
-    {SDL_PIXELFORMAT_RGB888, 1280, 768, 0, NULL}, // WXGA
-    {SDL_PIXELFORMAT_RGB888, 1280, 1024, 0, NULL}, // SXGA
-    {SDL_PIXELFORMAT_RGB888, 1920, 1200, 0, NULL}, // WUXGA
-    /* Native resolutions (progressive, "fullscreen") */
-    {SDL_PIXELFORMAT_RGB888, 720, 480, 0, NULL}, // 480p
-    {SDL_PIXELFORMAT_RGB888, 1280, 720, 0, NULL}, // 720p
-    {SDL_PIXELFORMAT_RGB888, 1920, 1080, 0, NULL} // 1080p
-};
-
-/* PS3 videomode number according to ps3fb_modedb */
-static PS3_DisplayModeData ps3fb_data[] = {
-    {11}, {12}, {13}, {130}, {131}, {133}, 
-};
-
-void
-PS3_GetDisplayModes(_THIS, SDL_VideoDisplay * display)
-{
-    deprintf(1, "+PS3_GetDisplayModes()\n");
-    SDL_DisplayMode mode;
-    unsigned int nummodes;
-
-    nummodes = sizeof(ps3fb_modedb) / sizeof(SDL_DisplayMode);
-
-    int n;
-    for (n=0; n<nummodes; ++n) {
-        /* Get driver specific mode data */
-        ps3fb_modedb[n].driverdata = &ps3fb_data[n];
-
-        /* Add DisplayMode to list */
-        deprintf(2, "Adding resolution %u x %u\n", ps3fb_modedb[n].w, ps3fb_modedb[n].h);
-        SDL_AddDisplayMode(display, &ps3fb_modedb[n]);
-    }
-    deprintf(1, "-PS3_GetDisplayModes()\n");
-}
-
-int
-PS3_SetDisplayMode(_THIS, SDL_VideoDisplay * display, SDL_DisplayMode * mode)
-{
-    deprintf(1, "+PS3_SetDisplayMode()\n");
-    SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
-    PS3_DisplayModeData *dispdata = (PS3_DisplayModeData *) mode->driverdata;
-
-    /* Set the new DisplayMode */
-    deprintf(2, "Setting PS3FB_MODE to %u\n", dispdata->mode);
-    if (ioctl(data->fbdev, PS3FB_IOCTL_SETMODE, (unsigned long)&dispdata->mode)) {
-        deprintf(2, "Could not set PS3FB_MODE\n");
-        SDL_SetError("Could not set PS3FB_MODE\n");
-        return -1;
-    }
-
-    deprintf(1, "-PS3_SetDisplayMode()\n");
-    return 0;
-}
-
-void
-PS3_QuitModes(_THIS)
-{
-    deprintf(1, "+PS3_QuitModes()\n");
-
-    /* There was no mem allocated for driverdata */
-    int i, j;
-    for (i = 0; i < SDL_GetNumVideoDisplays(); ++i) {
-        SDL_VideoDisplay *display = SDL_GetVideoDisplay(i);
-        for (j = display->num_display_modes; j--;) {
-            display->display_modes[j].driverdata = NULL;
-        }
-    }
-
-    deprintf(1, "-PS3_QuitModes()\n");
-}
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3modes_c.h	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,34 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#ifndef _SDL_ps3modes_h
-#define _SDL_ps3modes_h
-
-extern void PS3_InitModes(_THIS);
-extern void PS3_GetDisplayModes(_THIS, SDL_VideoDisplay * display);
-extern int PS3_SetDisplayMode(_THIS, SDL_VideoDisplay * display, SDL_DisplayMode * mode);
-extern void PS3_QuitModes(_THIS);
-
-#endif /* SDL_ps3modes_h */
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3render.c	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,747 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#include "SDL_video.h"
-#include "../SDL_sysvideo.h"
-#include "../SDL_yuv_sw_c.h"
-#include "../SDL_renderer_sw.h"
-
-#include "SDL_ps3video.h"
-#include "SDL_ps3spe_c.h"
-
-#include <fcntl.h>
-#include <stdlib.h>
-#include <sys/ioctl.h>
-#include <linux/kd.h>
-#include <linux/fb.h>
-#include <sys/mman.h>
-#include <asm/ps3fb.h>
-
-
-/* Stores the executable name */
-extern spe_program_handle_t yuv2rgb_spu;
-extern spe_program_handle_t bilin_scaler_spu;
-
-/* SDL surface based renderer implementation */
-static SDL_Renderer *SDL_PS3_CreateRenderer(SDL_Window * window,
-                                              Uint32 flags);
-static int SDL_PS3_DisplayModeChanged(SDL_Renderer * renderer);
-static int SDL_PS3_ActivateRenderer(SDL_Renderer * renderer);
-static int SDL_PS3_RenderPoint(SDL_Renderer * renderer, int x, int y);
-static int SDL_PS3_RenderLine(SDL_Renderer * renderer, int x1, int y1,
-                                int x2, int y2);
-static int SDL_PS3_RenderFill(SDL_Renderer * renderer,
-                                const SDL_Rect * rect);
-static int SDL_PS3_RenderCopy(SDL_Renderer * renderer,
-                                SDL_Texture * texture,
-                                const SDL_Rect * srcrect,
-                                const SDL_Rect * dstrect);
-static void SDL_PS3_RenderPresent(SDL_Renderer * renderer);
-static void SDL_PS3_DestroyRenderer(SDL_Renderer * renderer);
-
-/* Texture */
-static int PS3_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static int PS3_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture, void **pixels, int *pitch);
-static int PS3_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, const void *pixels, int pitch);
-static int PS3_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture, const SDL_Rect * rect, int markDirty, void **pixels, int *pitch);
-static void PS3_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-static void PS3_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture);
-
-
-SDL_RenderDriver SDL_PS3_RenderDriver = {
-    SDL_PS3_CreateRenderer,
-    {
-     "ps3",
-     (SDL_RENDERER_SINGLEBUFFER | SDL_RENDERER_PRESENTVSYNC |
-      SDL_RENDERER_PRESENTFLIP2 | SDL_RENDERER_PRESENTDISCARD |
-      SDL_RENDERER_ACCELERATED),
-     (SDL_TEXTUREMODULATE_NONE),
-     (SDL_BLENDMODE_NONE),
-     /* We use bilinear scaling on the SPE for YV12 & IYUV
-      * (width and height % 8 = 0) */
-     (SDL_SCALEMODE_SLOW)
-     }
-};
-
-typedef struct
-{
-    int current_screen;
-    SDL_Surface *screen;
-    SDL_VideoDisplay *display;
-    /* adress of the centered image in the framebuffer (double buffered) */
-    uint8_t *center[2];
-
-    /* width of input (bounded by writeable width) */
-    unsigned int bounded_width;
-    /* height of input (bounded by writeable height) */
-    unsigned int bounded_height;
-    /* offset from the left side (used for centering) */
-    unsigned int offset_left;
-    /* offset from the upper side (used for centering) */
-    unsigned int offset_top;
-    /* width of screen which is writeable */
-    unsigned int wr_width;
-    /* width of screen which is writeable */
-    unsigned int wr_height;
-    /* size of a screen line: width * bpp/8 */
-    unsigned int line_length;
-
-    /* Is the kernels fb size bigger than ~12MB
-     * double buffering will work for 1080p */
-    unsigned int double_buffering;
-
-    /* SPE threading stuff */
-    spu_data_t *converter_thread_data;
-    spu_data_t *scaler_thread_data;
-
-    /* YUV converting transfer data */
-    volatile struct yuv2rgb_parms_t * converter_parms __attribute__((aligned(128)));
-    /* Scaler transfer data */
-    volatile struct scale_parms_t * scaler_parms __attribute__((aligned(128)));
-} SDL_PS3_RenderData;
-
-typedef struct
-{
-    int pitch;
-    /* Image data */
-    volatile void *pixels;
-    /* Use software renderer for not supported formats */
-    SDL_SW_YUVTexture *yuv;
-} PS3_TextureData;
-
-SDL_Renderer *
-SDL_PS3_CreateRenderer(SDL_Window * window, Uint32 flags)
-{
-    deprintf(1, "+SDL_PS3_CreateRenderer()\n");
-    SDL_VideoDisplay *display = window->display;
-    SDL_DisplayMode *displayMode = &display->current_mode;
-    SDL_VideoData *devdata = display->device->driverdata;
-    SDL_Renderer *renderer;
-    SDL_PS3_RenderData *data;
-    struct ps3fb_ioctl_res res;
-    int i, n;
-    int bpp;
-    Uint32 Rmask, Gmask, Bmask, Amask;
-
-    if (!SDL_PixelFormatEnumToMasks
-        (displayMode->format, &bpp, &Rmask, &Gmask, &Bmask, &Amask)) {
-        SDL_SetError("Unknown display format");
-        return NULL;
-    }
-
-    renderer = (SDL_Renderer *) SDL_calloc(1, sizeof(*renderer));
-    if (!renderer) {
-        SDL_OutOfMemory();
-        return NULL;
-    }
-
-    data = (SDL_PS3_RenderData *) SDL_malloc(sizeof(*data));
-    if (!data) {
-        SDL_PS3_DestroyRenderer(renderer);
-        SDL_OutOfMemory();
-        return NULL;
-    }
-    SDL_zerop(data);
-
-    renderer->CreateTexture = PS3_CreateTexture;
-    renderer->DestroyTexture = PS3_DestroyTexture;
-    renderer->QueryTexturePixels = PS3_QueryTexturePixels;
-    renderer->UpdateTexture = PS3_UpdateTexture;
-    renderer->LockTexture = PS3_LockTexture;
-    renderer->UnlockTexture = PS3_UnlockTexture;
-    renderer->ActivateRenderer = SDL_PS3_ActivateRenderer;
-    renderer->DisplayModeChanged = SDL_PS3_DisplayModeChanged;
-    renderer->RenderPoint = SDL_PS3_RenderPoint;
-    renderer->RenderLine = SDL_PS3_RenderLine;
-    renderer->RenderFill = SDL_PS3_RenderFill;
-    renderer->RenderCopy = SDL_PS3_RenderCopy;
-    renderer->RenderPresent = SDL_PS3_RenderPresent;
-    renderer->DestroyRenderer = SDL_PS3_DestroyRenderer;
-    renderer->info.name = SDL_PS3_RenderDriver.info.name;
-    renderer->info.flags = 0;
-    renderer->window = window;
-    renderer->driverdata = data;
-
-    deprintf(1, "window->w = %u\n", window->w);
-    deprintf(1, "window->h = %u\n", window->h);
-
-    data->double_buffering = 0;
-
-    /* Get ps3 screeninfo */
-    if (ioctl(devdata->fbdev, PS3FB_IOCTL_SCREENINFO, (unsigned long)&res) < 0) {
-        SDL_SetError("[PS3] PS3FB_IOCTL_SCREENINFO failed");
-    }
-    deprintf(2, "res.num_frames = %d\n", res.num_frames);
-
-    /* Only use double buffering if enough fb memory is available */
-    if (res.num_frames > 1) {
-        renderer->info.flags |= SDL_RENDERER_PRESENTFLIP2;
-        n = 2;
-        data->double_buffering = 1;
-    } else {
-        renderer->info.flags |= SDL_RENDERER_PRESENTCOPY;
-        n = 1;
-    }
-
-    data->screen =
-        SDL_CreateRGBSurface(0, window->w, window->h, bpp, Rmask, Gmask,
-                             Bmask, Amask);
-    if (!data->screen) {
-        SDL_PS3_DestroyRenderer(renderer);
-        return NULL;
-    }
-    /* Allocate aligned memory for pixels */
-    SDL_free(data->screen->pixels);
-    data->screen->pixels = (void *)memalign(16, data->screen->h * data->screen->pitch);
-    if (!data->screen->pixels) {
-        SDL_FreeSurface(data->screen);
-        SDL_OutOfMemory();
-        return NULL;
-    }
-    SDL_memset(data->screen->pixels, 0, data->screen->h * data->screen->pitch);
-    SDL_SetSurfacePalette(data->screen, display->palette);
-
-    data->current_screen = 0;
-
-    /* Create SPU parms structure */
-    data->converter_parms = (struct yuv2rgb_parms_t *) memalign(16, sizeof(struct yuv2rgb_parms_t));
-    data->scaler_parms = (struct scale_parms_t *) memalign(16, sizeof(struct scale_parms_t));
-    if (data->converter_parms == NULL || data->scaler_parms == NULL) {
-        SDL_PS3_DestroyRenderer(renderer);
-        SDL_OutOfMemory();
-        return NULL;
-    }
-
-    /* Set up the SPE threading data */
-    data->converter_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
-    data->scaler_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
-    if (data->converter_thread_data == NULL || data->scaler_thread_data == NULL) {
-        SDL_PS3_DestroyRenderer(renderer);
-        SDL_OutOfMemory();
-        return NULL;
-    }
-
-    /* Set up the SPE scaler (booted) */
-    data->scaler_thread_data->program = bilin_scaler_spu;
-    data->scaler_thread_data->program_name = "bilin_scaler_spu";
-    data->scaler_thread_data->keepalive = 0;
-    data->scaler_thread_data->booted = 0;
-
-    /* Set up the SPE converter (always running) */
-    data->converter_thread_data->program = yuv2rgb_spu;
-    data->converter_thread_data->program_name = "yuv2rgb_spu";
-    data->converter_thread_data->keepalive = 1;
-    data->converter_thread_data->booted = 0;
-
-    SPE_Start(data->converter_thread_data);
-
-    deprintf(1, "-SDL_PS3_CreateRenderer()\n");
-    return renderer;
-}
-
-static int
-SDL_PS3_ActivateRenderer(SDL_Renderer * renderer)
-{
-    deprintf(1, "+PS3_ActivateRenderer()\n");
-    SDL_PS3_RenderData *data = (SDL_PS3_RenderData *) renderer->driverdata;
-
-    deprintf(1, "-PS3_ActivateRenderer()\n");
-    return 0;
-}
-
-static int SDL_PS3_DisplayModeChanged(SDL_Renderer * renderer) {
-    deprintf(1, "+PS3_DisplayModeChanged()\n");
-    SDL_PS3_RenderData *data = (SDL_PS3_RenderData *) renderer->driverdata;
-
-    deprintf(1, "-PS3_DisplayModeChanged()\n");
-    return 0;
-}
-
-static int
-PS3_CreateTexture(SDL_Renderer * renderer, SDL_Texture * texture) {
-    deprintf(1, "+PS3_CreateTexture()\n");
-    PS3_TextureData *data;
-    data = (PS3_TextureData *) SDL_calloc(1, sizeof(*data));
-    if (!data) {
-        SDL_OutOfMemory();
-        return -1;
-    }
-    data->pitch = (texture->w * SDL_BYTESPERPIXEL(texture->format));
-
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        /* Use SDLs SW_YUVTexture */
-        data->yuv =
-            SDL_SW_CreateYUVTexture(texture->format, texture->w, texture->h);
-        if (!data->yuv) {
-            SDL_OutOfMemory();
-            return -1;
-        }
-        /* but align pixels */
-        SDL_free(data->yuv->pixels);
-        data->yuv->pixels = (Uint8 *)memalign(16, texture->w * texture->h * 2);
-        if (!data->yuv->pixels) {
-            SDL_OutOfMemory();
-            return -1;
-        }
-
-        /* Redo: Find the pitch and offset values for the overlay */
-        SDL_SW_YUVTexture *swdata = (SDL_SW_YUVTexture *) data->yuv;
-        switch (texture->format) {
-            case SDL_PIXELFORMAT_YV12:
-            case SDL_PIXELFORMAT_IYUV:
-                swdata->pitches[0] = texture->w;
-                swdata->pitches[1] = swdata->pitches[0] / 2;
-                swdata->pitches[2] = swdata->pitches[0] / 2;
-                swdata->planes[0] = swdata->pixels;
-                swdata->planes[1] = swdata->planes[0] + swdata->pitches[0] * texture->h;
-                swdata->planes[2] = swdata->planes[1] + swdata->pitches[1] * texture->h / 2;
-                break;
-            case SDL_PIXELFORMAT_YUY2:
-            case SDL_PIXELFORMAT_UYVY:
-            case SDL_PIXELFORMAT_YVYU:
-                swdata->pitches[0] = texture->w * 2;
-                swdata->planes[0] = swdata->pixels;
-                break;
-            default:
-                /* We should never get here (caught above) */
-                break;
-        }
-    } else {
-        data->pixels = NULL;
-        data->pixels = SDL_malloc(texture->h * data->pitch);
-        if (!data->pixels) {
-            PS3_DestroyTexture(renderer, texture);
-            SDL_OutOfMemory();
-            return -1;
-        }
-    }
-    texture->driverdata = data;
-    deprintf(1, "-PS3_CreateTexture()\n");
-    return 0;
-}
-
-static int
-PS3_QueryTexturePixels(SDL_Renderer * renderer, SDL_Texture * texture,
-                      void **pixels, int *pitch)
-{
-    deprintf(1, "+PS3_QueryTexturePixels()\n");
-    PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
-
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        return SDL_SW_QueryYUVTexturePixels(data->yuv, pixels, pitch);
-    } else {
-        *pixels = (void *)data->pixels;
-        *pitch = data->pitch;
-    }
-
-    deprintf(1, "-PS3_QueryTexturePixels()\n");
-    return 0;
-}
-
-static int
-PS3_UpdateTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-                       const SDL_Rect * rect, const void *pixels, int pitch)
-{
-    deprintf(1, "+PS3_UpdateTexture()\n");
-    PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
-
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        return SDL_SW_UpdateYUVTexture(data->yuv, rect, pixels, pitch);
-    } else {
-        Uint8 *src, *dst;
-        int row;
-        size_t length;
-        Uint8 *dstpixels;
-
-        src = (Uint8 *) pixels;
-        dst = (Uint8 *) dstpixels + rect->y * data->pitch + rect->x
-                        * SDL_BYTESPERPIXEL(texture->format);
-        length = rect->w * SDL_BYTESPERPIXEL(texture->format);
-        /* Update the texture */
-        for (row = 0; row < rect->h; ++row) {
-            SDL_memcpy(dst, src, length);
-            src += pitch;
-            dst += data->pitch;
-        }
-    }
-    deprintf(1, "-PS3_UpdateTexture()\n");
-    return 0;
-}
-
-static int
-PS3_LockTexture(SDL_Renderer * renderer, SDL_Texture * texture,
-               const SDL_Rect * rect, int markDirty, void **pixels,
-               int *pitch)
-{
-    deprintf(1, "+PS3_LockTexture()\n");
-    PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
-
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        deprintf(1, "-PS3_LockTexture()\n");
-        return SDL_SW_LockYUVTexture(data->yuv, rect, markDirty, pixels, pitch);
-    } else {
-        *pixels =
-            (void *) ((Uint8 *) data->pixels + rect->y * data->pitch +
-                      rect->x * SDL_BYTESPERPIXEL(texture->format));
-        *pitch = data->pitch;
-        deprintf(1, "-PS3_LockTexture()\n");
-        return 0;
-    }
-}
-
-static void
-PS3_UnlockTexture(SDL_Renderer * renderer, SDL_Texture * texture)
-{
-    deprintf(1, "+PS3_UnlockTexture()\n");
-    PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
-
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        SDL_SW_UnlockYUVTexture(data->yuv);
-    }
-    deprintf(1, "-PS3_UnlockTexture()\n");
-}
-
-static void
-PS3_DestroyTexture(SDL_Renderer * renderer, SDL_Texture * texture)
-{
-    deprintf(1, "+PS3_DestroyTexture()\n");
-    PS3_TextureData *data = (PS3_TextureData *) texture->driverdata;
-
-    if (!data) {
-        return;
-    }
-    if (data->yuv) {
-        SDL_SW_DestroyYUVTexture(data->yuv);
-    }
-    if (data->pixels) {
-        SDL_free((void *)data->pixels);
-    }
-    deprintf(1, "-PS3_DestroyTexture()\n");
-}
-
-static int
-SDL_PS3_RenderPoint(SDL_Renderer * renderer, int x, int y)
-{
-    SDL_PS3_RenderData *data =
-        (SDL_PS3_RenderData *) renderer->driverdata;
-    SDL_Surface *target = data->screen;
-    int status;
-
-    if (renderer->blendMode == SDL_BLENDMODE_NONE ||
-        renderer->blendMode == SDL_BLENDMODE_MASK) {
-        Uint32 color =
-            SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b,
-                        renderer->a);
-
-        status = SDL_DrawPoint(target, x, y, color);
-    } else {
-        status =
-            SDL_BlendPoint(target, x, y, renderer->blendMode, renderer->r,
-                           renderer->g, renderer->b, renderer->a);
-    }
-    return status;
-}
-
-static int
-SDL_PS3_RenderLine(SDL_Renderer * renderer, int x1, int y1, int x2, int y2)
-{
-    SDL_PS3_RenderData *data =
-        (SDL_PS3_RenderData *) renderer->driverdata;
-    SDL_Surface *target = data->screen;
-    int status;
-
-    if (renderer->blendMode == SDL_BLENDMODE_NONE ||
-        renderer->blendMode == SDL_BLENDMODE_MASK) {
-        Uint32 color =
-            SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b,
-                        renderer->a);
-
-        status = SDL_DrawLine(target, x1, y1, x2, y2, color);
-    } else {
-        status =
-            SDL_BlendLine(target, x1, y1, x2, y2, renderer->blendMode,
-                          renderer->r, renderer->g, renderer->b, renderer->a);
-    }
-    return status;
-}
-
-static int
-SDL_PS3_RenderFill(SDL_Renderer * renderer, const SDL_Rect * rect)
-{
-    deprintf(1, "SDL_PS3_RenderFill()\n");
-    SDL_PS3_RenderData *data =
-        (SDL_PS3_RenderData *) renderer->driverdata;
-    SDL_Surface *target = data->screen;
-    SDL_Rect real_rect = *rect;
-    int status;
-
-    if (renderer->blendMode == SDL_BLENDMODE_NONE) {
-        Uint32 color =
-            SDL_MapRGBA(target->format, renderer->r, renderer->g, renderer->b,
-                        renderer->a);
-
-        status = SDL_FillRect(target, &real_rect, color);
-    } else {
-        status =
-            SDL_BlendFillRect(target, &real_rect, renderer->blendMode,
-                              renderer->r, renderer->g, renderer->b,
-                              renderer->a);
-    }
-    return status;
-}
-
-static int
-SDL_PS3_RenderCopy(SDL_Renderer * renderer, SDL_Texture * texture,
-                     const SDL_Rect * srcrect, const SDL_Rect * dstrect)
-{
-    deprintf(1, "+SDL_PS3_RenderCopy()\n");
-    SDL_PS3_RenderData *data =
-        (SDL_PS3_RenderData *) renderer->driverdata;
-    SDL_Window *window = SDL_GetWindowFromID(renderer->window);
-    SDL_VideoDisplay *display = window->display;
-    PS3_TextureData *txdata = (PS3_TextureData *) texture->driverdata;
-    SDL_VideoData *devdata = display->device->driverdata;
-
-    if (SDL_ISPIXELFORMAT_FOURCC(texture->format)) {
-        deprintf(1, "Texture is in a FOURCC format\n");
-        if ((texture->format == SDL_PIXELFORMAT_YV12 || texture->format == SDL_PIXELFORMAT_IYUV)
-                && texture->w % 8 == 0 && texture->h % 8 == 0
-                && dstrect->w % 8 == 0 && dstrect->h % 8 == 0) {
-            deprintf(1, "Use SPE for scaling/converting\n");
-
-            SDL_SW_YUVTexture *swdata = (SDL_SW_YUVTexture *) txdata->yuv;
-            Uint8 *lum, *Cr, *Cb;
-            Uint8 *scaler_out = NULL;
-            Uint8 *dstpixels;
-            switch (texture->format) {
-                case SDL_PIXELFORMAT_YV12:
-                    lum = swdata->planes[0];
-                    Cr = swdata->planes[1];
-                    Cb = swdata->planes[2];
-                    break;
-                case SDL_PIXELFORMAT_IYUV:
-                    lum = swdata->planes[0];
-                    Cr = swdata->planes[2];
-                    Cb = swdata->planes[1];
-                    break;
-                default:
-                    /* We should never get here (caught above) */
-                    return -1;
-            }
-
-            if (srcrect->w != dstrect->w || srcrect->h != dstrect->h) {
-                deprintf(1, "We need to scale the texture from %u x %u to %u x %u\n",
-                        srcrect->w, srcrect->h, dstrect->w, dstrect->h);
-                /* Alloc mem for scaled YUV picture */
-                scaler_out = (Uint8 *) memalign(16, dstrect->w * dstrect->h + ((dstrect->w * dstrect->h) >> 1));
-                if (scaler_out == NULL) {
-                    SDL_OutOfMemory();
-                    return -1;
-                }
-
-                /* Set parms for scaling */
-                data->scaler_parms->src_pixel_width = srcrect->w;
-                data->scaler_parms->src_pixel_height = srcrect->h;
-                data->scaler_parms->dst_pixel_width = dstrect->w;
-                data->scaler_parms->dst_pixel_height = dstrect->h;
-                data->scaler_parms->y_plane = lum;
-                data->scaler_parms->v_plane = Cr;
-                data->scaler_parms->u_plane = Cb;
-                data->scaler_parms->dstBuffer = scaler_out;
-                data->scaler_thread_data->argp = (void *)data->scaler_parms;
-
-                /* Scale the YUV overlay to given size */
-                SPE_Start(data->scaler_thread_data);
-                SPE_Stop(data->scaler_thread_data);
-
-                /* Set parms for converting after scaling */
-                data->converter_parms->y_plane = scaler_out;
-                data->converter_parms->v_plane = scaler_out + dstrect->w * dstrect->h;
-                data->converter_parms->u_plane = scaler_out + dstrect->w * dstrect->h + ((dstrect->w * dstrect->h) >> 2);
-            } else {
-                data->converter_parms->y_plane = lum;
-                data->converter_parms->v_plane = Cr;
-                data->converter_parms->u_plane = Cb;
-            }
-
-            dstpixels = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x
-                            * SDL_BYTESPERPIXEL(texture->format);
-            data->converter_parms->src_pixel_width = dstrect->w;
-            data->converter_parms->src_pixel_height = dstrect->h;
-            data->converter_parms->dstBuffer = dstpixels/*(Uint8 *)data->screen->pixels*/;
-            data->converter_thread_data->argp = (void *)data->converter_parms;
-
-            /* Convert YUV texture to RGB */
-            SPE_SendMsg(data->converter_thread_data, SPU_START);
-            SPE_SendMsg(data->converter_thread_data, (unsigned int)data->converter_thread_data->argp);
-
-            /* We can probably move that to RenderPresent() */
-            SPE_WaitForMsg(data->converter_thread_data, SPU_FIN);
-            if (scaler_out) {
-                free(scaler_out);
-            }
-        } else {
-            deprintf(1, "Use software for scaling/converting\n");
-            Uint8 *dst;
-            /* FIXME: Not good */
-            dst = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x
-                            * SDL_BYTESPERPIXEL(texture->format);
-            return SDL_SW_CopyYUVToRGB(txdata->yuv, srcrect, display->current_mode.format,
-                                   dstrect->w, dstrect->h, dst/*data->screen->pixels*/,
-                                   data->screen->pitch);
-        }
-    } else {
-        deprintf(1, "SDL_ISPIXELFORMAT_FOURCC = false\n");
-
-        Uint8 *src, *dst;
-        int row;
-        size_t length;
-        Uint8 *dstpixels;
-
-        src = (Uint8 *) txdata->pixels;
-        dst = (Uint8 *) data->screen->pixels + dstrect->y * data->screen->pitch + dstrect->x
-                        * SDL_BYTESPERPIXEL(texture->format);
-        length = dstrect->w * SDL_BYTESPERPIXEL(texture->format);
-        for (row = 0; row < dstrect->h; ++row) {
-            SDL_memcpy(dst, src, length);
-            src += txdata->pitch;
-            dst += data->screen->pitch;
-        }
-    }
-
-    deprintf(1, "-SDL_PS3_RenderCopy()\n");
-    return 0;
-}
-
-static void
-SDL_PS3_RenderPresent(SDL_Renderer * renderer)
-{
-    deprintf(1, "+SDL_PS3_RenderPresent()\n");
-    SDL_PS3_RenderData *data =
-        (SDL_PS3_RenderData *) renderer->driverdata;
-    SDL_Window *window = SDL_GetWindowFromID(renderer->window);
-    SDL_VideoDisplay *display = window->display;
-    SDL_VideoData *devdata = display->device->driverdata;
-
-    /* Send the data to the screen */
-    /* Get screeninfo */
-    struct fb_fix_screeninfo fb_finfo;
-    if (ioctl(devdata->fbdev, FBIOGET_FSCREENINFO, &fb_finfo)) {
-        SDL_SetError("[PS3] Can't get fixed screeninfo");
-    }
-    struct fb_var_screeninfo fb_vinfo;
-    if (ioctl(devdata->fbdev, FBIOGET_VSCREENINFO, &fb_vinfo)) {
-        SDL_SetError("[PS3] Can't get VSCREENINFO");
-    }
-
-    /* 16 and 15 bpp is reported as 16 bpp */
-    //txdata->bpp = fb_vinfo.bits_per_pixel;
-    //if (txdata->bpp == 16)
-    //    txdata->bpp = fb_vinfo.red.length + fb_vinfo.green.length + fb_vinfo.blue.length;
-
-    /* Adjust centering */
-    data->bounded_width = window->w < fb_vinfo.xres ? window->w : fb_vinfo.xres;
-    data->bounded_height = window->h < fb_vinfo.yres ? window->h : fb_vinfo.yres;
-    /* We could use SDL's CENTERED flag for centering */
-    data->offset_left = (fb_vinfo.xres - data->bounded_width) >> 1;
-    data->offset_top = (fb_vinfo.yres - data->bounded_height) >> 1;
-    data->center[0] = devdata->frame_buffer + data->offset_left * /*txdata->bpp/8*/ 4 +
-                data->offset_top * fb_finfo.line_length;
-    data->center[1] = data->center[0] + fb_vinfo.yres * fb_finfo.line_length;
-
-    deprintf(1, "offset_left = %u\n", data->offset_left);
-    deprintf(1, "offset_top = %u\n", data->offset_top);
-
-    /* Set SPU parms for copying the surface to framebuffer */
-    devdata->fb_parms->data = (unsigned char *)data->screen->pixels;
-    devdata->fb_parms->center = data->center[data->current_screen];
-    devdata->fb_parms->out_line_stride = fb_finfo.line_length;
-    devdata->fb_parms->in_line_stride = window->w * /*txdata->bpp / 8*/4;
-    devdata->fb_parms->bounded_input_height = data->bounded_height;
-    devdata->fb_parms->bounded_input_width = data->bounded_width;
-    //devdata->fb_parms->fb_pixel_size = txdata->bpp / 8;
-    devdata->fb_parms->fb_pixel_size = 4;//SDL_BYTESPERPIXEL(window->format);
-
-    deprintf(3, "[PS3->SPU] fb_thread_data->argp = 0x%x\n", devdata->fb_thread_data->argp);
-
-    /* Copying.. */
-    SPE_SendMsg(devdata->fb_thread_data, SPU_START);
-    SPE_SendMsg(devdata->fb_thread_data, (unsigned int)devdata->fb_thread_data->argp);
-
-    SPE_WaitForMsg(devdata->fb_thread_data, SPU_FIN);
-
-    /* Wait for vsync */
-    if (renderer->info.flags & SDL_RENDERER_PRESENTVSYNC) {
-        unsigned long crt = 0;
-        deprintf(1, "[PS3] Wait for vsync\n");
-        ioctl(devdata->fbdev, FBIO_WAITFORVSYNC, &crt);
-    }
-
-    /* Page flip */
-    deprintf(1, "[PS3] Page flip to buffer #%u 0x%x\n", data->current_screen, data->center[data->current_screen]);
-    ioctl(devdata->fbdev, PS3FB_IOCTL_FSEL, (unsigned long)&data->current_screen);
-
-    /* Update the flipping chain, if any */
-    if (data->double_buffering) {
-        data->current_screen = (data->current_screen + 1) % 2;
-    }
-    deprintf(1, "-SDL_PS3_RenderPresent()\n");
-}
-
-static void
-SDL_PS3_DestroyRenderer(SDL_Renderer * renderer)
-{
-    deprintf(1, "+SDL_PS3_DestroyRenderer()\n");
-    SDL_PS3_RenderData *data =
-        (SDL_PS3_RenderData *) renderer->driverdata;
-    int i;
-
-    if (data) {
-        for (i = 0; i < SDL_arraysize(data->screen); ++i) {
-            if (data->screen) {
-                SDL_FreeSurface(data->screen);
-            }
-        }
-
-        /* Shutdown SPE and release related resources */
-        if (data->scaler_thread_data) {
-            free((void *)data->scaler_thread_data);
-        }
-        if (data->scaler_parms) {
-            free((void *)data->scaler_parms);
-        }
-        if (data->converter_thread_data) {
-            SPE_Shutdown(data->converter_thread_data);
-            free((void *)data->converter_thread_data);
-        }
-        if (data->converter_parms) {
-            free((void *)data->converter_parms);
-        }
-
-        SDL_free(data);
-    }
-    SDL_free(renderer);
-    deprintf(1, "-SDL_PS3_DestroyRenderer()\n");
-}
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3render_c.h	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-/* Default framebuffer device on PS3 */
-/* SDL surface based renderer implementation */
-
-extern SDL_RenderDriver SDL_PS3_RenderDriver;
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3spe.c	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,166 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#include "SDL_video.h"
-#include "SDL_ps3spe_c.h"
-
-#include "SDL_ps3video.h"
-#include "SDL_ps3render_c.h"
-
-/* Start the SPE thread */
-int SPE_Start(spu_data_t * spe_data)
-{
-  deprintf(2, "[PS3->SPU] Start SPE: %s\n", spe_data->program_name);
-  if (!(spe_data->booted))
-    SPE_Boot(spe_data);
-
-  /* To allow re-running of context, spe_ctx_entry has to be set before each call */
-  spe_data->entry = SPE_DEFAULT_ENTRY;
-  spe_data->error_code = 0;
-
-  /* Create SPE thread and run */
-  deprintf(2, "[PS3->SPU] Create Thread: %s\n", spe_data->program_name);
-  if (pthread_create
-      (&spe_data->thread, NULL, (void *)&SPE_RunContext, (void *)spe_data)) {
-    deprintf(2, "[PS3->SPU] Could not create pthread for spe: %s\n", spe_data->program_name);
-    SDL_SetError("[PS3->SPU] Could not create pthread for spe");
-    return -1;
-  }
-
-  if (spe_data->keepalive)
-    SPE_WaitForMsg(spe_data, SPU_READY);
-}
-
-/* Stop the SPE thread */
-int SPE_Stop(spu_data_t * spe_data)
-{
-  deprintf(2, "[PS3->SPU] Stop SPE: %s\n", spe_data->program_name);
-  /* Wait for SPE thread to complete */
-  deprintf(2, "[PS3->SPU] Wait for SPE thread to complete: %s\n", spe_data->program_name);
-  if (pthread_join(spe_data->thread, NULL)) {
-    deprintf(2, "[PS3->SPU] Failed joining the thread: %s\n", spe_data->program_name);
-    SDL_SetError("[PS3->SPU] Failed joining the thread");
-    return -1;
-  }
-
-  return 0;
-}
-
-/* Create SPE context and load program */
-int SPE_Boot(spu_data_t * spe_data)
-{
-  /* Create SPE context */
-  deprintf(2, "[PS3->SPU] Create SPE Context: %s\n", spe_data->program_name);
-  spe_data->ctx = spe_context_create(0, NULL);
-  if (spe_data->ctx == NULL) {
-    deprintf(2, "[PS3->SPU] Failed creating SPE context: %s\n", spe_data->program_name);
-    SDL_SetError("[PS3->SPU] Failed creating SPE context");
-    return -1;
-  }
-
-  /* Load SPE object into SPE local store */
-  deprintf(2, "[PS3->SPU] Load Program into SPE: %s\n", spe_data->program_name);
-  if (spe_program_load(spe_data->ctx, &spe_data->program)) {
-    deprintf(2, "[PS3->SPU] Failed loading program into SPE context: %s\n", spe_data->program_name);
-    SDL_SetError
-        ("[PS3->SPU] Failed loading program into SPE context");
-    return -1;
-  }
-  spe_data->booted = 1;
-  deprintf(2, "[PS3->SPU] SPE boot successful\n");
-
-  return 0;
-}
-
-/* (Stop and) shutdown the SPE */
-int SPE_Shutdown(spu_data_t * spe_data)
-{
-  if (spe_data->keepalive && spe_data->booted) {
-    SPE_SendMsg(spe_data, SPU_EXIT);
-    SPE_Stop(spe_data);
-  }
-
-  /* Destroy SPE context */
-  deprintf(2, "[PS3->SPU] Destroy SPE context: %s\n", spe_data->program_name);
-  if (spe_context_destroy(spe_data->ctx)) {
-    deprintf(2, "[PS3->SPU] Failed destroying context: %s\n", spe_data->program_name);
-    SDL_SetError("[PS3->SPU] Failed destroying context");
-    return -1;
-  }
-  deprintf(2, "[PS3->SPU] SPE shutdown successful: %s\n", spe_data->program_name);
-  return 0;
-}
-
-/* Send message to the SPE via mailboxe */
-int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg)
-{
-  deprintf(2, "[PS3->SPU] Sending message %u to %s\n", msg, spe_data->program_name);
-  /* Send one message, block until message was sent */
-  unsigned int spe_in_mbox_msgs[1];
-  spe_in_mbox_msgs[0] = msg;
-  int in_mbox_write = spe_in_mbox_write(spe_data->ctx, spe_in_mbox_msgs, 1, SPE_MBOX_ALL_BLOCKING);
-
-  if (1 > in_mbox_write) {
-    deprintf(2, "[PS3->SPU] No message could be written to %s\n", spe_data->program_name);
-    SDL_SetError("[PS3->SPU] No message could be written");
-    return -1;
-  }
-  return 0;
-}
-
-
-/* Read 1 message from SPE, block until at least 1 message was received */
-int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg)
-{
-  deprintf(2, "[PS3->SPU] Waiting for message from %s\n", spe_data->program_name);
-  unsigned int out_messages[1];
-  while (!spe_out_mbox_status(spe_data->ctx));
-  int mbox_read = spe_out_mbox_read(spe_data->ctx, out_messages, 1);
-  deprintf(2, "[PS3->SPU] Got message from %s, message was %u\n", spe_data->program_name, out_messages[0]);
-  if (out_messages[0] == msg)
-    return 0;
-  else
-    return -1;
-}
-
-/* Re-runnable invocation of the spe_context_run call */
-void SPE_RunContext(void *thread_argp)
-{
-  /* argp is the pointer to argument to be passed to the SPE program */
-  spu_data_t *args = (spu_data_t *) thread_argp;
-  deprintf(3, "[PS3->SPU] void* argp=0x%x\n", (unsigned int)args->argp);
-
-  /* Run it.. */
-  deprintf(2, "[PS3->SPU] Run SPE program: %s\n", args->program_name);
-  if (spe_context_run
-      (args->ctx, &args->entry, 0, (void *)args->argp, NULL,
-       NULL) < 0) {
-    deprintf(2, "[PS3->SPU] Failed running SPE context: %s\n", args->program_name);
-    SDL_SetError("[PS3->SPU] Failed running SPE context: %s", args->program_name);
-    exit(1);
-  }
-
-  pthread_exit(NULL);
-}
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3spe_c.h	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-
-/* This SPE API basically provides 3 ways to run and control a program
- * on the SPE:
- * - Start and stop the program (keepalive=0).
- *   SPE_Start() will implicitly boot up the program, create a thread and run
- *   the context.
- *   SPE_Stop() will join the (terminated) thread (may block) and return.
- * - Boot the program and run it (keepalive=0).
- *   SPE_Boot() will create a context and load the program and finally start
- *   the context with SPE_Start().
- *   SPE_Stop() will savely end the program.
- * - Boot, Run and send messages to the program (keepalive=1).
- *   Start the program by using one of the methods described above. When
- *   received the READY-message the program is in its infinite loop waiting
- *   for new messages.
- *   Every time you run the program, send SPU_START and the address of the
- *   according struct using SPE_SendMsg().
- *   SPE_WaitForMsg() will than wait for SPU_FIN and is blocking.
- *   SPE_Shutdown() sends SPU_EXIT and finally stops the program.
- *
- * Therefor the SPE program
- * - either runs once and returns
- * - or runs in an infinite loop and is controlled by messages.
- */
-
-#include "SDL_config.h"
-
-#include "spulibs/spu_common.h"
-
-#include <libspe2.h>
-
-#ifndef _SDL_ps3spe_h
-#define _SDL_ps3spe_h
-
-/* SPU handling data */
-typedef struct spu_data {
-    /* Context to be executed */
-    spe_context_ptr_t ctx;
-    spe_program_handle_t program;
-    /* Thread running the context */
-    pthread_t thread;
-    /* For debugging */
-    char * program_name;
-    /* SPE_Start() or SPE_Boot() called */
-    unsigned int booted;
-    /* Runs the program in an infinite loop? */
-    unsigned int keepalive;
-    unsigned int entry;
-    /* Exit code of the program */
-    int error_code;
-    /* Arguments passed to the program */
-    void * argp;
-} spu_data_t;
-
-/* SPU specific API functions */
-int SPE_Start(spu_data_t * spe_data);
-int SPE_Stop(spu_data_t * spe_data);
-int SPE_Boot(spu_data_t * spe_data);
-int SPE_Shutdown(spu_data_t * spe_data);
-int SPE_SendMsg(spu_data_t * spe_data, unsigned int msg);
-int SPE_WaitForMsg(spu_data_t * spe_data, unsigned int msg);
-void SPE_RunContext(void *thread_argp);
-
-#endif /* _SDL_ps3spe_h */
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3video.c	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,228 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-/* SDL PS3 video driver implementation based on dummy video driver
- *
- * Initial work by Ryan C. Gordon (icculus@icculus.org). A good portion
- *  of this was cut-and-pasted from Stephane Peter's work in the AAlib
- *  SDL video driver.  Renamed to "DUMMY" by Sam Lantinga.
- */
-
-#include "SDL_video.h"
-#include "SDL_mouse.h"
-#include "../SDL_sysvideo.h"
-#include "../SDL_pixels_c.h"
-#include "../../events/SDL_events_c.h"
-
-#include "SDL_ps3video.h"
-#include "SDL_ps3spe_c.h"
-#include "SDL_ps3events_c.h"
-#include "SDL_ps3render_c.h"
-#include "SDL_ps3modes_c.h"
-
-#include <fcntl.h>
-#include <linux/fb.h>
-#include <asm/ps3fb.h>
-#include <sys/mman.h>
-
-#define PS3VID_DRIVER_NAME "ps3"
-
-/* Initialization/Query functions */
-static int PS3_VideoInit(_THIS);
-static void PS3_VideoQuit(_THIS);
-
-/* Stores the SPE executable name of fb_writer_spu */
-extern spe_program_handle_t fb_writer_spu;
-
-/* PS3 driver bootstrap functions */
-
-static int
-PS3_Available(void)
-{
-    deprintf(1, "+PS3_Available()\n");
-    const char *envr = SDL_getenv("SDL_VIDEODRIVER");
-    if ((envr) && (SDL_strcmp(envr, PS3VID_DRIVER_NAME) == 0)) {
-        return (1);
-    }
-
-    deprintf(1, "-PS3_Available()\n");
-    return (0);
-}
-
-static void
-PS3_DeleteDevice(SDL_VideoDevice * device)
-{
-    deprintf(1, "+PS3_DeleteDevice()\n");
-    SDL_free(device->driverdata);
-    SDL_free(device);
-    deprintf(1, "-PS3_DeleteDevice()\n");
-}
-
-static SDL_VideoDevice *
-PS3_CreateDevice(int devindex)
-{
-    deprintf(1, "+PS3_CreateDevice()\n");
-    SDL_VideoDevice *device;
-    SDL_VideoData *data;
-
-    /* Initialize all variables that we clean on shutdown */
-    device = (SDL_VideoDevice *) SDL_calloc(1, sizeof(SDL_VideoDevice));
-    if (!device) {
-        SDL_OutOfMemory();
-        if (device) {
-            SDL_free(device);
-        }
-        return (0);
-    }
-    data = (struct SDL_VideoData *) SDL_calloc(1, sizeof(SDL_VideoData));
-    if (!data) {
-        SDL_OutOfMemory();
-        SDL_free(device);
-        return (0);
-    }
-    device->driverdata = data;
-
-    /* Set the function pointers */
-    device->VideoInit = PS3_VideoInit;
-    device->VideoQuit = PS3_VideoQuit;
-    device->SetDisplayMode = PS3_SetDisplayMode;
-    device->GetDisplayModes = PS3_GetDisplayModes;
-    device->PumpEvents = PS3_PumpEvents;
-
-    device->free = PS3_DeleteDevice;
-
-    deprintf(1, "-PS3_CreateDevice()\n");
-    return device;
-}
-
-VideoBootStrap PS3_bootstrap = {
-    PS3VID_DRIVER_NAME, "SDL PS3 Cell video driver",
-    PS3_Available, PS3_CreateDevice
-};
-
-
-int
-PS3_VideoInit(_THIS)
-{
-    int i;
-
-    deprintf(1, "PS3_VideoInit()\n");
-
-    SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
-    SDL_DisplayMode mode;
-
-    /* Create SPU fb_parms and thread structure */
-    data->fb_parms = (struct fb_writer_parms_t *)
-        memalign(16, sizeof(struct fb_writer_parms_t));
-    data->fb_thread_data = (spu_data_t *) malloc(sizeof(spu_data_t));
-    if (data->fb_parms == NULL || data->fb_thread_data == NULL) {
-        SDL_OutOfMemory();
-        return -1;
-    }
-    data->fb_thread_data->program = fb_writer_spu;
-    data->fb_thread_data->program_name = "fb_writer_spu";
-    data->fb_thread_data->argp = (void *)data->fb_parms;
-    data->fb_thread_data->keepalive = 1;
-    data->fb_thread_data->booted = 0;
-
-    SPE_Start(data->fb_thread_data);
-
-    /* Open the device */
-    data->fbdev = open(PS3DEV, O_RDWR);
-    if (data->fbdev < 0) {
-        SDL_SetError("[PS3] Unable to open device %s", PS3DEV);
-        return -1;
-    }
-
-    /* Take control of frame buffer from kernel, for details see
-     * http://felter.org/wesley/files/ps3/linux-20061110-docs/ApplicationProgrammingEnvironment.html
-     * kernel will no longer flip the screen itself
-     */
-    ioctl(data->fbdev, PS3FB_IOCTL_ON, 0);
-
-    /* Unblank screen */
-    ioctl(data->fbdev, FBIOBLANK, 0);
-
-    struct fb_fix_screeninfo fb_finfo;
-    if (ioctl(data->fbdev, FBIOGET_FSCREENINFO, &fb_finfo)) {
-        SDL_SetError("[PS3] Can't get fixed screeninfo");
-        return (0);
-    }
-
-    /* Note: on PS3, fb_finfo.smem_len is enough for double buffering */
-    if ((data->frame_buffer = (uint8_t *)mmap(0, fb_finfo.smem_len,
-        PROT_READ | PROT_WRITE, MAP_SHARED,
-        data->fbdev, 0)) == (uint8_t *) - 1) {
-        SDL_SetError("[PS3] Can't mmap for %s", PS3DEV);
-        return (0);
-    } else {
-        /* Enable double buffering */
-    }
-
-    /* Blank screen */
-    memset(data->frame_buffer, 0x00, fb_finfo.smem_len);
-
-    PS3_InitModes(_this);
-    for (i = 0; i < _this->num_displays; ++i) {
-        SDL_AddRenderDriver(&_this->displays[i], &SDL_PS3_RenderDriver);
-    }
-
-    /* We're done! */
-    return 0;
-}
-
-void
-PS3_VideoQuit(_THIS)
-{
-    deprintf(1, "PS3_VideoQuit()\n");
-    SDL_VideoData *data = (SDL_VideoData *) _this->driverdata;
-
-    PS3_QuitModes(_this);
-
-    /* Unmap framebuffer */
-    if (data->frame_buffer) {
-        struct fb_fix_screeninfo fb_finfo;
-        if (ioctl(data->fbdev, FBIOGET_FSCREENINFO, &fb_finfo) != -1) {
-            munmap(data->frame_buffer, fb_finfo.smem_len);
-            data->frame_buffer = 0;
-        }
-    }
-
-    /* Shutdown SPE and related resources */
-    if (data->fb_parms)
-        free((void *)data->fb_parms);
-    if (data->fb_thread_data) {
-        SPE_Shutdown(data->fb_thread_data);
-        free((void *)data->fb_thread_data);
-    }
-
-    /* Close device */
-    if (data->fbdev) {
-        /* Give control of frame buffer back to kernel */
-        ioctl(data->fbdev, PS3FB_IOCTL_OFF, 0);
-        close(data->fbdev);
-        data->fbdev = -1;
-    }
-}
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/SDL_ps3video.h	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,79 +0,0 @@
-/*
-    SDL - Simple DirectMedia Layer
-    Copyright (C) 1997-2010 Sam Lantinga
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
-
-    Sam Lantinga
-    slouken@libsdl.org
-*/
-#include "SDL_config.h"
-
-#ifndef _SDL_ps3video_h
-#define _SDL_ps3video_h
-
-#include "../SDL_sysvideo.h"
-#include "SDL_ps3spe_c.h"
-
-#include <linux/fb.h>
-#include <asm/ps3fb.h>
-
-/* Debugging
- * 0: No debug messages
- * 1: Video debug messages
- * 2: SPE debug messages
- * 3: Memory adresses
- */
-#define DEBUG_LEVEL 0
-
-#ifdef DEBUG_LEVEL
-#define deprintf( level, fmt, args... ) \
-    do \
-{ \
-    if ( (unsigned)(level) <= DEBUG_LEVEL ) \
-    { \
-        fprintf( stdout, fmt, ##args ); \
-        fflush( stdout ); \
-    } \
-} while ( 0 )
-#else
-#define deprintf( level, fmt, args... )
-#endif
-
-/* Default framebuffer device on PS3 */
-#define PS3DEV "/dev/fb0"
-
-/* Private display data */
-typedef struct SDL_VideoData
-{
-    /* Framebuffer device descriptor */
-    int fbdev;
-    /* mmap'd access to fbdev */
-    uint8_t * frame_buffer;
-    /* SPE threading stuff of the framebuffer */
-    spu_data_t * fb_thread_data;
-    /* Framebuffer transfer data */
-    volatile struct fb_writer_parms_t * fb_parms __attribute__((aligned(128)));
-} SDL_VideoData;
-
-typedef struct SDL_DisplayModeData
-{
-    unsigned long mode;
-    //struct ps3fb_ioctl_res res;
-} PS3_DisplayModeData;
-
-#endif /* _SDL_ps3video_h */
-
-/* vi: set ts=4 sw=4 expandtab: */
--- a/src/video/ps3/spulibs/Makefile	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,47 +0,0 @@
-# This Makefile is for building the CELL BE SPU libs
-# libfb_writer_spu.so, libyuv2rgb_spu.so, libbilin_scaler_spu.so
-
-# Toolchain
-PPU_LD=/usr/bin/ld
-SPU_SRCDIR=$(srcdir)/src/video/ps3/spulibs
-SPU_LIBDIR=$(srcdir)/src/video/ps3/spulibs/libs
-SPU_CFLAGS=-g -W -Wall -Winline -Wno-main -I. -I /usr/spu/include -I /opt/cell/sdk/usr/spu/include -finline-limit=10000 -Winline -ftree-vectorize -funroll-loops -fmodulo-sched -ffast-math -fPIC -O2
-
-DEPS = $(SPU_SRCDIR)/spu_common.h
-LIBS= fb_writer yuv2rgb bilin_scaler
-
-OBJLIBS = $(foreach lib,$(LIBS),lib$(lib)_spu.a)
-SHALIBS = $(foreach lib,$(LIBS),lib$(lib)_spu.so)
-
-
-ps3libs: $(foreach lib,$(OBJLIBS),$(SPU_LIBDIR)/$(lib)) $(foreach lib,$(SHALIBS),$(SPU_LIBDIR)/$(lib))
-
-
-$(SPU_LIBDIR)/lib%_spu.a: $(SPU_LIBDIR)/%-embed.o
-	$(AR) -qcs $@ $<
-
-$(SPU_LIBDIR)/lib%_spu.so: $(SPU_LIBDIR)/%-embed.o
-	$(PPU_LD) -o $@ -shared -soname=$(notdir $@) $<
-
-$(SPU_LIBDIR)/%-embed.o: $(SPU_LIBDIR)/%.o
-	$(EMBEDSPU) -m32 $(subst -embed.o,,$(notdir $@))_spu $< $@
-
-$(SPU_LIBDIR)/%.o: $(SPU_SRCDIR)/%.c $(DEPS)
-	$(SPU_GCC) $(SPU_CFLAGS) -o $@ $< -lm
-
-
-ps3libs-install: $(foreach obj,$(OBJLIBS),$(SPU_LIBDIR)/$(obj)) $(foreach obj,$(SHALIBS),$(SPU_LIBDIR)/$(obj))
-	for file in $(OBJLIBS); do \
-		$(INSTALL) -c -m 0655 $(SPU_LIBDIR)/$$file $(DESTDIR)$(libdir)/$$file; \
-	done
-	for file in $(SHALIBS); do \
-		$(INSTALL) -c -m 0755 $(SPU_LIBDIR)/$$file $(DESTDIR)$(libdir)/$$file; \
-	done
-
-ps3libs-uninstall:
-	for file in $(OBJLIBS) $(SHALIBS); do \
-		rm -f $(DESTDIR)$(libdir)/$$file; \
-	done
-
-ps3libs-clean:
-	rm -f $(SPU_LIBDIR)/*
--- a/src/video/ps3/spulibs/bilin_scaler.c	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2050 +0,0 @@
-/*
- * SDL - Simple DirectMedia Layer
- * CELL BE Support for PS3 Framebuffer
- * Copyright (C) 2008, 2009 International Business Machines Corporation
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
- * USA
- *
- *  Martin Lowinski  <lowinski [at] de [dot] ibm [ibm] com>
- *  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
- *  SPE code based on research by:
- *  Rene Becker
- *  Thimo Emmerich
- */
-
-#include "spu_common.h"
-
-#include <spu_intrinsics.h>
-#include <spu_mfcio.h>
-
-// Debugging
-//#define DEBUG
-
-#ifdef DEBUG
-#define deprintf(fmt, args... ) \
-	fprintf( stdout, fmt, ##args ); \
-	fflush( stdout );
-#else
-#define deprintf( fmt, args... )
-#endif
-
-struct scale_parms_t parms __attribute__((aligned(128)));
-
-/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored
- * there might be the need to retrieve misaligned data, adjust
- * incoming v and u plane to be able to handle this (add 128)
- */
-unsigned char y_plane[2][(MAX_HDTV_WIDTH+128)*4] __attribute__((aligned(128)));
-unsigned char v_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
-unsigned char u_plane[2][(MAX_HDTV_WIDTH+128)*2] __attribute__((aligned(128)));
-
-/* temp-buffer for scaling: 4 lines Y, therefore 2 lines V, 2 lines U */
-unsigned char scaled_y_plane[2][MAX_HDTV_WIDTH*2] __attribute__((aligned(128)));
-unsigned char scaled_v_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
-unsigned char scaled_u_plane[2][MAX_HDTV_WIDTH/2] __attribute__((aligned(128)));
-
-/* some vectors needed by the float to int conversion */
-static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f };
-static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f };
-
-void bilinear_scale_line_w8(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
-void bilinear_scale_line_w16(unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride);
-
-void scale_srcw16_dstw16();
-void scale_srcw16_dstw32();
-void scale_srcw32_dstw16();
-void scale_srcw32_dstw32();
-
-int main( unsigned long long spe_id __attribute__((unused)), unsigned long long argp )
-{
-	deprintf("[SPU] bilin_scaler_spu is up... (on SPE #%llu)\n", spe_id);
-	/* DMA transfer for the input parameters */
-	spu_mfcdma32(&parms, (unsigned int)argp, sizeof(struct scale_parms_t), TAG_INIT, MFC_GET_CMD);
-	DMA_WAIT_TAG(TAG_INIT);
-
-	deprintf("[SPU] Scale %ux%u to %ux%u\n", parms.src_pixel_width, parms.src_pixel_height,
-			parms.dst_pixel_width, parms.dst_pixel_height);
-
-	if(parms.src_pixel_width & 0x1f) {
-		if(parms.dst_pixel_width & 0x1F) {
-			deprintf("[SPU] Using scale_srcw16_dstw16\n");
-			scale_srcw16_dstw16();
-		} else {
-			deprintf("[SPU] Using scale_srcw16_dstw32\n");
-			scale_srcw16_dstw32();
-		}
-	} else {
-		if(parms.dst_pixel_width & 0x1F) {
-			deprintf("[SPU] Using scale_srcw32_dstw16\n");
-			scale_srcw32_dstw16();
-		} else {
-			deprintf("[SPU] Using scale_srcw32_dstw32\n");
-			scale_srcw32_dstw32();
-		}
-	}
-	deprintf("[SPU] bilin_scaler_spu... done!\n");
-
-	return 0;
-}
-
-
-/*
- * vfloat_to_vuint()
- *
- * converts a float vector to an unsinged int vector using saturated
- * arithmetic
- *
- * @param vec_s float vector for conversion
- * @returns converted unsigned int vector
- */
-inline static vector unsigned int vfloat_to_vuint(vector float vec_s) {
-	vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
-	vec_s = spu_sel(vec_s, vec_0_1, select_1);
-
-	vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
-	vec_s = spu_sel(vec_s, vec_255, select_2);
-	return spu_convtu(vec_s,0);
-}
-
-
-/*
- * scale_srcw16_dstw16()
- *
- * processes an input image of width 16
- * scaling is done to a width 16
- * result stored in RAM
- */
-void scale_srcw16_dstw16() {
-	// extract parameters
-	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
-
-	unsigned int src_width = parms.src_pixel_width;
-	unsigned int src_height = parms.src_pixel_height;
-	unsigned int dst_width = parms.dst_pixel_width;
-	unsigned int dst_height = parms.dst_pixel_height;
-
-	// YVU
-	unsigned int src_linestride_y = src_width;
-	unsigned int src_dbl_linestride_y = src_width<<1;
-	unsigned int src_linestride_vu = src_width>>1;
-	unsigned int src_dbl_linestride_vu = src_width;
-
-	// scaled YVU
-	unsigned int scaled_src_linestride_y = dst_width;
-
-	// ram addresses
-	unsigned char* src_addr_y = parms.y_plane;
-	unsigned char* src_addr_v = parms.v_plane;
-	unsigned char* src_addr_u = parms.u_plane;
-
-	// for handling misalignment, addresses are precalculated
-	unsigned char* precalc_src_addr_v = src_addr_v;
-	unsigned char* precalc_src_addr_u = src_addr_u;
-
-	unsigned int dst_picture_size = dst_width*dst_height;
-
-	// Sizes for destination
-	unsigned int dst_dbl_linestride_y = dst_width<<1;
-	unsigned int dst_dbl_linestride_vu = dst_width>>1;
-
-	// Perform address calculation for Y, V and U in main memory with dst_addr as base
-	unsigned char* dst_addr_main_memory_y = dst_addr;
-	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
-	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
-
-	// calculate scale factors
-	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
-	float y_scale = (float)src_height/(float)dst_height;
-
-	// double buffered processing
-	// buffer switching
-	unsigned int curr_src_idx = 0;
-	unsigned int curr_dst_idx = 0;
-	unsigned int next_src_idx, next_dst_idx;
-
-	// 2 lines y as output, upper and lowerline
-	unsigned int curr_interpl_y_upper = 0;
-	unsigned int next_interpl_y_upper;
-	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
-	// only 1 line v/u output, both planes have the same dimension
-	unsigned int curr_interpl_vu = 0;
-	unsigned int next_interpl_vu;
-
-	// weights, calculated in every loop iteration
-	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_y_upper;
-	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
-	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_vu;
-
-	// line indices for the src picture
-	float curr_src_y_upper = 0.0f, next_src_y_upper;
-	float curr_src_y_lower, next_src_y_lower;
-	float curr_src_vu = 0.0f, next_src_vu;
-
-	// line indices for the dst picture
-	unsigned int dst_y=0, dst_vu=0;
-
-	// offset for the v and u plane to handle misalignement
-	unsigned int curr_lsoff_v = 0, next_lsoff_v;
-	unsigned int curr_lsoff_u = 0, next_lsoff_u;
-
-	// calculate lower line indices
-	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
-	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
-	// lower line weight
-	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
-
-
-	// start partially double buffered processing
-	// get initial data, 2 sets of y, 1 set v, 1 set u
-	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
-	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
-			src_dbl_linestride_y,
-			RETR_BUF,
-			0, 0 );
-	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-
-	/* iteration loop
-	 * within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
-	 * the scaled output is 2 lines y, 1 line v, 1 line u
-	 * the yuv2rgb-converted output is stored to RAM
-	 */
-	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
-		dst_y = dst_vu<<1;
-
-		// calculate next indices
-		next_src_vu = ((float)dst_vu+1)*y_scale;
-		next_src_y_upper = ((float)dst_y+2)*y_scale;
-		next_src_y_lower = ((float)dst_y+3)*y_scale;
-
-		next_interpl_vu = (unsigned int) next_src_vu;
-		next_interpl_y_upper = (unsigned int) next_src_y_upper;
-		next_interpl_y_lower = (unsigned int) next_src_y_lower;
-
-		// calculate weight NORTH-SOUTH
-		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
-		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
-		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
-
-		// get next lines
-		next_src_idx = curr_src_idx^1;
-		next_dst_idx = curr_dst_idx^1;
-
-		// 4 lines y
-		mfc_get( y_plane[next_src_idx],
-				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
-				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		// 2 lines v
-		precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
-		next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
-		mfc_get( v_plane[next_src_idx],
-				((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
-				src_dbl_linestride_vu+(next_lsoff_v<<1),
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		// 2 lines u
-		precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
-		next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
-		mfc_get( u_plane[next_src_idx],
-				((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
-				src_dbl_linestride_vu+(next_lsoff_v<<1),
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-		// scaling
-		// work line y_upper
-		bilinear_scale_line_w16( y_plane[curr_src_idx],
-				scaled_y_plane[curr_src_idx],
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_upper,
-				src_linestride_y );
-		// work line y_lower
-		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_lower,
-				src_linestride_y );
-		// work line v
-		bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
-				scaled_v_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-		// work line u
-		bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
-				scaled_u_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-
-
-		// Store the result back to main memory into a destination buffer in YUV format
-		//---------------------------------------------------------------------------------------------
-		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-		// Perform three DMA transfers to 3 different locations in the main memory!
-		// dst_width:	Pixel width of destination image
-		// dst_addr:	Destination address in main memory
-		// dst_vu:	Counter which is incremented one by one
-		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-		mfc_put(	scaled_y_plane[curr_src_idx],					// What from local store (addr)
-				(unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-				dst_dbl_linestride_y,						// Two Y lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,						// Tag
-				0, 0 );
-
-		mfc_put(	scaled_v_plane[curr_src_idx],					// What from local store (addr)
-				(unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,						// Two V lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,						// Tag
-				0, 0 );
-
-		mfc_put(	scaled_u_plane[curr_src_idx],					// What from local store (addr)
-				(unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,						// Two U lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,						// Tag
-				0, 0 );
-		//---------------------------------------------------------------------------------------------
-
-
-		// update for next cycle
-		curr_src_idx = next_src_idx;
-		curr_dst_idx = next_dst_idx;
-
-		curr_interpl_y_upper = next_interpl_y_upper;
-		curr_interpl_y_lower = next_interpl_y_lower;
-		curr_interpl_vu = next_interpl_vu;
-
-		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
-		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
-		vf_curr_NSweight_vu = vf_next_NSweight_vu;
-
-		curr_src_y_upper = next_src_y_upper;
-		curr_src_y_lower = next_src_y_lower;
-		curr_src_vu = next_src_vu;
-
-		curr_lsoff_v = next_lsoff_v;
-		curr_lsoff_u = next_lsoff_u;
-	}
-
-
-
-	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-	// scaling
-	// work line y_upper
-	bilinear_scale_line_w16( y_plane[curr_src_idx],
-			scaled_y_plane[curr_src_idx],
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_upper,
-			src_linestride_y );
-	// work line y_lower
-	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_lower,
-			src_linestride_y );
-	// work line v
-	bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
-			scaled_v_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-	// work line u
-	bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
-			scaled_u_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-
-
-	// Store the result back to main memory into a destination buffer in YUV format
-	//---------------------------------------------------------------------------------------------
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-	// Perform three DMA transfers to 3 different locations in the main memory!
-	// dst_width:	Pixel width of destination image
-	// dst_addr:	Destination address in main memory
-	// dst_vu:	Counter which is incremented one by one
-	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-	mfc_put(	scaled_y_plane[curr_src_idx],					// What from local store (addr)
-			(unsigned int)dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-			dst_dbl_linestride_y,						// Two Y lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,						// Tag
-			0, 0 );
-
-	mfc_put(	scaled_v_plane[curr_src_idx],					// What from local store (addr)
-			(unsigned int)dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,						// Two V lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,						// Tag
-			0, 0 );
-
-	mfc_put(	scaled_u_plane[curr_src_idx],					// What from local store (addr)
-			(unsigned int)dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,						// Two U lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,						// Tag
-			0, 0 );
-
-	// wait for completion
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-	//---------------------------------------------------------------------------------------------
-}
-
-
-/*
- * scale_srcw16_dstw32()
- *
- * processes an input image of width 16
- * scaling is done to a width 32
- * yuv2rgb conversion on a width of 32
- * result stored in RAM
- */
-void scale_srcw16_dstw32() {
-	// extract parameters
-	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
-
-	unsigned int src_width = parms.src_pixel_width;
-	unsigned int src_height = parms.src_pixel_height;
-	unsigned int dst_width = parms.dst_pixel_width;
-	unsigned int dst_height = parms.dst_pixel_height;
-
-	// YVU
-	unsigned int src_linestride_y = src_width;
-	unsigned int src_dbl_linestride_y = src_width<<1;
-	unsigned int src_linestride_vu = src_width>>1;
-	unsigned int src_dbl_linestride_vu = src_width;
-	// scaled YVU
-	unsigned int scaled_src_linestride_y = dst_width;
-
-	// ram addresses
-	unsigned char* src_addr_y = parms.y_plane;
-	unsigned char* src_addr_v = parms.v_plane;
-	unsigned char* src_addr_u = parms.u_plane;
-
-	unsigned int dst_picture_size = dst_width*dst_height;
-
-	// Sizes for destination
-	unsigned int dst_dbl_linestride_y = dst_width<<1;
-	unsigned int dst_dbl_linestride_vu = dst_width>>1;
-
-	// Perform address calculation for Y, V and U in main memory with dst_addr as base
-	unsigned char* dst_addr_main_memory_y = dst_addr;
-	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
-	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
-
-
-	// for handling misalignment, addresses are precalculated
-	unsigned char* precalc_src_addr_v = src_addr_v;
-	unsigned char* precalc_src_addr_u = src_addr_u;
-
-	// calculate scale factors
-	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
-	float y_scale = (float)src_height/(float)dst_height;
-
-	// double buffered processing
-	// buffer switching
-	unsigned int curr_src_idx = 0;
-	unsigned int curr_dst_idx = 0;
-	unsigned int next_src_idx, next_dst_idx;
-
-	// 2 lines y as output, upper and lowerline
-	unsigned int curr_interpl_y_upper = 0;
-	unsigned int next_interpl_y_upper;
-	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
-	// only 1 line v/u output, both planes have the same dimension
-	unsigned int curr_interpl_vu = 0;
-	unsigned int next_interpl_vu;
-
-	// weights, calculated in every loop iteration
-	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_y_upper;
-	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
-	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_vu;
-
-	// line indices for the src picture
-	float curr_src_y_upper = 0.0f, next_src_y_upper;
-	float curr_src_y_lower, next_src_y_lower;
-	float curr_src_vu = 0.0f, next_src_vu;
-
-	// line indices for the dst picture
-	unsigned int dst_y=0, dst_vu=0;
-
-	// offset for the v and u plane to handle misalignement
-	unsigned int curr_lsoff_v = 0, next_lsoff_v;
-	unsigned int curr_lsoff_u = 0, next_lsoff_u;
-
-	// calculate lower line idices
-	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
-	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
-	// lower line weight
-	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
-
-
-	// start partially double buffered processing
-	// get initial data, 2 sets of y, 1 set v, 1 set u
-	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
-	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
-			src_dbl_linestride_y,
-			RETR_BUF,
-			0, 0 );
-	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-
-	// iteration loop
-	// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
-	// the scaled output is 2 lines y, 1 line v, 1 line u
-	// the yuv2rgb-converted output is stored to RAM
-	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
-		dst_y = dst_vu<<1;
-
-		// calculate next indices
-		next_src_vu = ((float)dst_vu+1)*y_scale;
-		next_src_y_upper = ((float)dst_y+2)*y_scale;
-		next_src_y_lower = ((float)dst_y+3)*y_scale;
-
-		next_interpl_vu = (unsigned int) next_src_vu;
-		next_interpl_y_upper = (unsigned int) next_src_y_upper;
-		next_interpl_y_lower = (unsigned int) next_src_y_lower;
-
-		// calculate weight NORTH-SOUTH
-		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
-		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
-		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
-
-		// get next lines
-		next_src_idx = curr_src_idx^1;
-		next_dst_idx = curr_dst_idx^1;
-
-		// 4 lines y
-		mfc_get( y_plane[next_src_idx],
-				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
-				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		// 2 lines v
-		precalc_src_addr_v = src_addr_v+(next_interpl_vu*src_linestride_vu);
-		next_lsoff_v = ((unsigned int)precalc_src_addr_v)&0x0F;
-		mfc_get( v_plane[next_src_idx],
-				((unsigned int) precalc_src_addr_v)&0xFFFFFFF0,
-				src_dbl_linestride_vu+(next_lsoff_v<<1),
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		// 2 lines u
-		precalc_src_addr_u = src_addr_u+(next_interpl_vu*src_linestride_vu);
-		next_lsoff_u = ((unsigned int)precalc_src_addr_u)&0x0F;
-		mfc_get( u_plane[next_src_idx],
-				((unsigned int) precalc_src_addr_u)&0xFFFFFFF0,
-				src_dbl_linestride_vu+(next_lsoff_v<<1),
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-		// scaling
-		// work line y_upper
-		bilinear_scale_line_w16( y_plane[curr_src_idx],
-				scaled_y_plane[curr_src_idx],
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_upper,
-				src_linestride_y );
-		// work line y_lower
-		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_lower,
-				src_linestride_y );
-		// work line v
-		bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
-				scaled_v_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-		// work line u
-		bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
-				scaled_u_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-
-		//---------------------------------------------------------------------------------------------
-		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-		// Perform three DMA transfers to 3 different locations in the main memory!
-		// dst_width:	Pixel width of destination image
-		// dst_addr:	Destination address in main memory
-		// dst_vu:	Counter which is incremented one by one
-		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-		mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-				dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-		//---------------------------------------------------------------------------------------------
-
-
-		// update for next cycle
-		curr_src_idx = next_src_idx;
-		curr_dst_idx = next_dst_idx;
-
-		curr_interpl_y_upper = next_interpl_y_upper;
-		curr_interpl_y_lower = next_interpl_y_lower;
-		curr_interpl_vu = next_interpl_vu;
-
-		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
-		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
-		vf_curr_NSweight_vu = vf_next_NSweight_vu;
-
-		curr_src_y_upper = next_src_y_upper;
-		curr_src_y_lower = next_src_y_lower;
-		curr_src_vu = next_src_vu;
-
-		curr_lsoff_v = next_lsoff_v;
-		curr_lsoff_u = next_lsoff_u;
-	}
-
-
-
-	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-	// scaling
-	// work line y_upper
-	bilinear_scale_line_w16( y_plane[curr_src_idx],
-			scaled_y_plane[curr_src_idx],
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_upper,
-			src_linestride_y );
-	// work line y_lower
-	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_lower,
-			src_linestride_y );
-	// work line v
-	bilinear_scale_line_w8( v_plane[curr_src_idx]+curr_lsoff_v,
-			scaled_v_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-	// work line u
-	bilinear_scale_line_w8( u_plane[curr_src_idx]+curr_lsoff_u,
-			scaled_u_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-
-	//---------------------------------------------------------------------------------------------
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-	// Perform three DMA transfers to 3 different locations in the main memory!
-	// dst_width:	Pixel width of destination image
-	// dst_addr:	Destination address in main memory
-	// dst_vu:	Counter which is incremented one by one
-	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-	mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-			dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	// wait for completion
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-	//---------------------------------------------------------------------------------------------
-}
-
-
-/*
- * scale_srcw32_dstw16()
- *
- * processes an input image of width 32
- * scaling is done to a width 16
- * yuv2rgb conversion on a width of 16
- * result stored in RAM
- */
-void scale_srcw32_dstw16() {
-	// extract parameters
-	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
-
-	unsigned int src_width = parms.src_pixel_width;
-	unsigned int src_height = parms.src_pixel_height;
-	unsigned int dst_width = parms.dst_pixel_width;
-	unsigned int dst_height = parms.dst_pixel_height;
-
-	// YVU
-	unsigned int src_linestride_y = src_width;
-	unsigned int src_dbl_linestride_y = src_width<<1;
-	unsigned int src_linestride_vu = src_width>>1;
-	unsigned int src_dbl_linestride_vu = src_width;
-	// scaled YVU
-	unsigned int scaled_src_linestride_y = dst_width;
-
-	// ram addresses
-	unsigned char* src_addr_y = parms.y_plane;
-	unsigned char* src_addr_v = parms.v_plane;
-	unsigned char* src_addr_u = parms.u_plane;
-
-	unsigned int dst_picture_size = dst_width*dst_height;
-
-	// Sizes for destination
-	unsigned int dst_dbl_linestride_y = dst_width<<1;
-	unsigned int dst_dbl_linestride_vu = dst_width>>1;
-
-	// Perform address calculation for Y, V and U in main memory with dst_addr as base
-	unsigned char* dst_addr_main_memory_y = dst_addr;
-	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
-	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
-
-	// calculate scale factors
-	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
-	float y_scale = (float)src_height/(float)dst_height;
-
-	// double buffered processing
-	// buffer switching
-	unsigned int curr_src_idx = 0;
-	unsigned int curr_dst_idx = 0;
-	unsigned int next_src_idx, next_dst_idx;
-
-	// 2 lines y as output, upper and lowerline
-	unsigned int curr_interpl_y_upper = 0;
-	unsigned int next_interpl_y_upper;
-	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
-	// only 1 line v/u output, both planes have the same dimension
-	unsigned int curr_interpl_vu = 0;
-	unsigned int next_interpl_vu;
-
-	// weights, calculated in every loop iteration
-	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_y_upper;
-	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
-	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_vu;
-
-	// line indices for the src picture
-	float curr_src_y_upper = 0.0f, next_src_y_upper;
-	float curr_src_y_lower, next_src_y_lower;
-	float curr_src_vu = 0.0f, next_src_vu;
-
-	// line indices for the dst picture
-	unsigned int dst_y=0, dst_vu=0;
-
-	// calculate lower line idices
-	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
-	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
-	// lower line weight
-	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
-
-
-	// start partially double buffered processing
-	// get initial data, 2 sets of y, 1 set v, 1 set u
-	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
-	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
-			src_dbl_linestride_y,
-			RETR_BUF,
-			0, 0 );
-	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-
-	// iteration loop
-	// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
-	// the scaled output is 2 lines y, 1 line v, 1 line u
-	// the yuv2rgb-converted output is stored to RAM
-	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
-		dst_y = dst_vu<<1;
-
-		// calculate next indices
-		next_src_vu = ((float)dst_vu+1)*y_scale;
-		next_src_y_upper = ((float)dst_y+2)*y_scale;
-		next_src_y_lower = ((float)dst_y+3)*y_scale;
-
-		next_interpl_vu = (unsigned int) next_src_vu;
-		next_interpl_y_upper = (unsigned int) next_src_y_upper;
-		next_interpl_y_lower = (unsigned int) next_src_y_lower;
-
-		// calculate weight NORTH-SOUTH
-		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
-		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
-		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
-
-		// get next lines
-		next_src_idx = curr_src_idx^1;
-		next_dst_idx = curr_dst_idx^1;
-
-		// 4 lines y
-		mfc_get( y_plane[next_src_idx],
-				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
-				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		// 2 lines v
-		mfc_get( v_plane[next_src_idx],
-				(unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
-				src_dbl_linestride_vu,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		// 2 lines u
-		mfc_get( u_plane[next_src_idx],
-				(unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
-				src_dbl_linestride_vu,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-		// scaling
-		// work line y_upper
-		bilinear_scale_line_w16( y_plane[curr_src_idx],
-				scaled_y_plane[curr_src_idx],
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_upper,
-				src_linestride_y );
-		// work line y_lower
-		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_lower,
-				src_linestride_y );
-		// work line v
-		bilinear_scale_line_w16( v_plane[curr_src_idx],
-				scaled_v_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-		// work line u
-		bilinear_scale_line_w16( u_plane[curr_src_idx],
-				scaled_u_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-
-		//---------------------------------------------------------------------------------------------
-		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-		// Perform three DMA transfers to 3 different locations in the main memory!
-		// dst_width:	Pixel width of destination image
-		// dst_addr:	Destination address in main memory
-		// dst_vu:	Counter which is incremented one by one
-		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-		mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-				dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-		//---------------------------------------------------------------------------------------------
-
-
-		// update for next cycle
-		curr_src_idx = next_src_idx;
-		curr_dst_idx = next_dst_idx;
-
-		curr_interpl_y_upper = next_interpl_y_upper;
-		curr_interpl_y_lower = next_interpl_y_lower;
-		curr_interpl_vu = next_interpl_vu;
-
-		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
-		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
-		vf_curr_NSweight_vu = vf_next_NSweight_vu;
-
-		curr_src_y_upper = next_src_y_upper;
-		curr_src_y_lower = next_src_y_lower;
-		curr_src_vu = next_src_vu;
-	}
-
-
-
-	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-	// scaling
-	// work line y_upper
-	bilinear_scale_line_w16( y_plane[curr_src_idx],
-			scaled_y_plane[curr_src_idx],
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_upper,
-			src_linestride_y );
-	// work line y_lower
-	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_lower,
-			src_linestride_y );
-	// work line v
-	bilinear_scale_line_w16( v_plane[curr_src_idx],
-			scaled_v_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-	// work line u
-	bilinear_scale_line_w16( u_plane[curr_src_idx],
-			scaled_u_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-
-
-	//---------------------------------------------------------------------------------------------
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-	// Perform three DMA transfers to 3 different locations in the main memory!
-	// dst_width:	Pixel width of destination image
-	// dst_addr:	Destination address in main memory
-	// dst_vu:	Counter which is incremented one by one
-	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-	mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-			dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	// wait for completion
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-	//---------------------------------------------------------------------------------------------
-}
-
-
-/**
- * scale_srcw32_dstw32()
- *
- * processes an input image of width 32
- * scaling is done to a width 32
- * yuv2rgb conversion on a width of 32
- * result stored in RAM
- */
-void scale_srcw32_dstw32() {
-	// extract parameters
-	unsigned char* dst_addr = (unsigned char *)parms.dstBuffer;
-
-	unsigned int src_width = parms.src_pixel_width;
-	unsigned int src_height = parms.src_pixel_height;
-	unsigned int dst_width = parms.dst_pixel_width;
-	unsigned int dst_height = parms.dst_pixel_height;
-
-	// YVU
-	unsigned int src_linestride_y = src_width;
-	unsigned int src_dbl_linestride_y = src_width<<1;
-	unsigned int src_linestride_vu = src_width>>1;
-	unsigned int src_dbl_linestride_vu = src_width;
-
-	// scaled YVU
-	unsigned int scaled_src_linestride_y = dst_width;
-
-	// ram addresses
-	unsigned char* src_addr_y = parms.y_plane;
-	unsigned char* src_addr_v = parms.v_plane;
-	unsigned char* src_addr_u = parms.u_plane;
-
-	unsigned int dst_picture_size = dst_width*dst_height;
-
-	// Sizes for destination
-	unsigned int dst_dbl_linestride_y = dst_width<<1;
-	unsigned int dst_dbl_linestride_vu = dst_width>>1;
-
-	// Perform address calculation for Y, V and U in main memory with dst_addr as base
-	unsigned char* dst_addr_main_memory_y = dst_addr;
-	unsigned char* dst_addr_main_memory_v = dst_addr + dst_picture_size;
-	unsigned char* dst_addr_main_memory_u = dst_addr_main_memory_v +(dst_picture_size>>2);
-
-	// calculate scale factors
-	vector float vf_x_scale = spu_splats( (float)src_width/(float)dst_width );
-	float y_scale = (float)src_height/(float)dst_height;
-
-	// double buffered processing
-	// buffer switching
-	unsigned int curr_src_idx = 0;
-	unsigned int curr_dst_idx = 0;
-	unsigned int next_src_idx, next_dst_idx;
-
-	// 2 lines y as output, upper and lowerline
-	unsigned int curr_interpl_y_upper = 0;
-	unsigned int next_interpl_y_upper;
-	unsigned int curr_interpl_y_lower, next_interpl_y_lower;
-	// only 1 line v/u output, both planes have the same dimension
-	unsigned int curr_interpl_vu = 0;
-	unsigned int next_interpl_vu;
-
-	// weights, calculated in every loop iteration
-	vector float vf_curr_NSweight_y_upper = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_y_upper;
-	vector float vf_curr_NSweight_y_lower, vf_next_NSweight_y_lower;
-	vector float vf_curr_NSweight_vu = { 0.0f, 0.0f, 0.0f, 0.0f };
-	vector float vf_next_NSweight_vu;
-
-	// line indices for the src picture
-	float curr_src_y_upper = 0.0f, next_src_y_upper;
-	float curr_src_y_lower, next_src_y_lower;
-	float curr_src_vu = 0.0f, next_src_vu;
-
-	// line indices for the dst picture
-	unsigned int dst_y=0, dst_vu=0;
-
-	// calculate lower line idices
-	curr_src_y_lower = ((float)curr_interpl_y_upper+1)*y_scale;
-	curr_interpl_y_lower = (unsigned int)curr_src_y_lower;
-	// lower line weight
-	vf_curr_NSweight_y_lower = spu_splats( curr_src_y_lower-(float)curr_interpl_y_lower );
-
-
-	// start partially double buffered processing
-	// get initial data, 2 sets of y, 1 set v, 1 set u
-	mfc_get( y_plane[curr_src_idx], (unsigned int) src_addr_y, src_dbl_linestride_y, RETR_BUF, 0, 0 );
-	mfc_get( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			(unsigned int) src_addr_y+(curr_interpl_y_lower*src_linestride_y),
-			src_dbl_linestride_y,
-			RETR_BUF,
-			0, 0 );
-	mfc_get( v_plane[curr_src_idx], (unsigned int) src_addr_v, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-	mfc_get( u_plane[curr_src_idx], (unsigned int) src_addr_u, src_dbl_linestride_vu, RETR_BUF, 0, 0 );
-
-	// iteration loop
-	// within each iteration 4 lines y, 2 lines v, 2 lines u are retrieved
-	// the scaled output is 2 lines y, 1 line v, 1 line u
-	// the yuv2rgb-converted output is stored to RAM
-	for( dst_vu=0; dst_vu<(dst_height>>1)-1; dst_vu++ ) {
-		dst_y = dst_vu<<1;
-
-		// calculate next indices
-		next_src_vu = ((float)dst_vu+1)*y_scale;
-		next_src_y_upper = ((float)dst_y+2)*y_scale;
-		next_src_y_lower = ((float)dst_y+3)*y_scale;
-
-		next_interpl_vu = (unsigned int) next_src_vu;
-		next_interpl_y_upper = (unsigned int) next_src_y_upper;
-		next_interpl_y_lower = (unsigned int) next_src_y_lower;
-
-		// calculate weight NORTH-SOUTH
-		vf_next_NSweight_vu = spu_splats( next_src_vu-(float)next_interpl_vu );
-		vf_next_NSweight_y_upper = spu_splats( next_src_y_upper-(float)next_interpl_y_upper );
-		vf_next_NSweight_y_lower = spu_splats( next_src_y_lower-(float)next_interpl_y_lower );
-
-		// get next lines
-		next_src_idx = curr_src_idx^1;
-		next_dst_idx = curr_dst_idx^1;
-
-		// 4 lines y
-		mfc_get( y_plane[next_src_idx],
-				(unsigned int) src_addr_y+(next_interpl_y_upper*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		mfc_get( y_plane[next_src_idx]+src_dbl_linestride_y,
-				(unsigned int) src_addr_y+(next_interpl_y_lower*src_linestride_y),
-				src_dbl_linestride_y,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		// 2 lines v
-		mfc_get( v_plane[next_src_idx],
-				(unsigned int) src_addr_v+(next_interpl_vu*src_linestride_vu),
-				src_dbl_linestride_vu,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-		// 2 lines u
-		mfc_get( u_plane[next_src_idx],
-				(unsigned int) src_addr_u+(next_interpl_vu*src_linestride_vu),
-				src_dbl_linestride_vu,
-				RETR_BUF+next_src_idx,
-				0, 0 );
-
-		DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-		// scaling
-		// work line y_upper
-		bilinear_scale_line_w16( y_plane[curr_src_idx],
-				scaled_y_plane[curr_src_idx],
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_upper,
-				src_linestride_y );
-		// work line y_lower
-		bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-				scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-				dst_width,
-				vf_x_scale,
-				vf_curr_NSweight_y_lower,
-				src_linestride_y );
-		// work line v
-		bilinear_scale_line_w16( v_plane[curr_src_idx],
-				scaled_v_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-		// work line u
-		bilinear_scale_line_w16( u_plane[curr_src_idx],
-				scaled_u_plane[curr_src_idx],
-				dst_width>>1,
-				vf_x_scale,
-				vf_curr_NSweight_vu,
-				src_linestride_vu );
-
-
-
-		// Store the result back to main memory into a destination buffer in YUV format
-		//---------------------------------------------------------------------------------------------
-		DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-		// Perform three DMA transfers to 3 different locations in the main memory!
-		// dst_width:	Pixel width of destination image
-		// dst_addr:	Destination address in main memory
-		// dst_vu:	Counter which is incremented one by one
-		// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-		mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int) dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-				dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-
-		mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-				(unsigned int) dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-				dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-				STR_BUF+curr_dst_idx,								// Tag
-				0, 0 );
-		//---------------------------------------------------------------------------------------------
-
-
-		// update for next cycle
-		curr_src_idx = next_src_idx;
-		curr_dst_idx = next_dst_idx;
-
-		curr_interpl_y_upper = next_interpl_y_upper;
-		curr_interpl_y_lower = next_interpl_y_lower;
-		curr_interpl_vu = next_interpl_vu;
-
-		vf_curr_NSweight_y_upper = vf_curr_NSweight_y_upper;
-		vf_curr_NSweight_y_lower = vf_curr_NSweight_y_lower;
-		vf_curr_NSweight_vu = vf_next_NSweight_vu;
-
-		curr_src_y_upper = next_src_y_upper;
-		curr_src_y_lower = next_src_y_lower;
-		curr_src_vu = next_src_vu;
-	}
-
-
-
-	DMA_WAIT_TAG( (RETR_BUF+curr_src_idx) );
-
-	// scaling
-	// work line y_upper
-	bilinear_scale_line_w16( y_plane[curr_src_idx],
-			scaled_y_plane[curr_src_idx],
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_upper,
-			src_linestride_y );
-	// work line y_lower
-	bilinear_scale_line_w16( y_plane[curr_src_idx]+src_dbl_linestride_y,
-			scaled_y_plane[curr_src_idx]+scaled_src_linestride_y,
-			dst_width,
-			vf_x_scale,
-			vf_curr_NSweight_y_lower,
-			src_linestride_y );
-	// work line v
-	bilinear_scale_line_w16( v_plane[curr_src_idx],
-			scaled_v_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-	// work line u
-	bilinear_scale_line_w16( u_plane[curr_src_idx],
-			scaled_u_plane[curr_src_idx],
-			dst_width>>1,
-			vf_x_scale,
-			vf_curr_NSweight_vu,
-			src_linestride_vu );
-
-
-	// Store the result back to main memory into a destination buffer in YUV format
-	//---------------------------------------------------------------------------------------------
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-
-	// Perform three DMA transfers to 3 different locations in the main memory!
-	// dst_width:	Pixel width of destination image
-	// dst_addr:	Destination address in main memory
-	// dst_vu:	Counter which is incremented one by one
-	// dst_y:	Counter which is twice larger than dst_vu (dst_y = 2*dst_vu)
-
-	mfc_put(	scaled_y_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_y + (dst_vu*dst_dbl_linestride_y),	// Destination in main memory (addr)
-			dst_dbl_linestride_y,								// Two Y lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_v_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int) dst_addr_main_memory_v + (dst_vu*dst_dbl_linestride_vu),	// Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two V lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	mfc_put(	scaled_u_plane[curr_src_idx],							// What from local store (addr)
-			(unsigned int)  dst_addr_main_memory_u + (dst_vu*dst_dbl_linestride_vu), // Destination in main memory (addr)
-			dst_dbl_linestride_vu,								// Two U lines (depending on the widht of the destination resolution)
-			STR_BUF+curr_dst_idx,								// Tag
-			0, 0 );
-
-	// wait for completion
-	DMA_WAIT_TAG( (STR_BUF+curr_dst_idx) );
-	//---------------------------------------------------------------------------------------------
-}
-
-
-/*
- * bilinear_scale_line_w8()
- *
- * processes a line of yuv-input, width has to be a multiple of 8
- * scaled yuv-output is written to local store buffer
- *
- * @param src buffer for 2 lines input
- * @param dst_ buffer for 1 line output
- * @param dst_width the width of the destination line
- * @param vf_x_scale a float vector, at each entry is the x_scale-factor
- * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
- * @param src_linestride the stride of the srcline
- */
-void bilinear_scale_line_w8( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
-
-	unsigned char* dst = dst_;
-
-	unsigned int dst_x;
-	for( dst_x=0; dst_x<dst_width; dst_x+=8) {
-		// address calculation for loading the 4 surrounding pixel of each calculated
-		// destination pixel
-		vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
-		// lower range->first 4 pixel
-		// upper range->next 4 pixel
-		vector unsigned int vui_inc_dst_x_lower_range = { 0, 1, 2, 3 };
-		vector unsigned int vui_inc_dst_x_upper_range = { 4, 5, 6, 7 };
-		vector unsigned int vui_dst_x_lower_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_lower_range );
-		vector unsigned int vui_dst_x_upper_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_upper_range );
-
-		// calculate weight EAST-WEST
-		vector float vf_dst_x_lower_range = spu_convtf( vui_dst_x_lower_range, 0 );
-		vector float vf_dst_x_upper_range = spu_convtf( vui_dst_x_upper_range, 0 );
-		vector float vf_src_x_lower_range = spu_mul( vf_dst_x_lower_range, vf_x_scale );
-		vector float vf_src_x_upper_range = spu_mul( vf_dst_x_upper_range, vf_x_scale );
-		vector unsigned int vui_interpl_x_lower_range = spu_convtu( vf_src_x_lower_range, 0 );
-		vector unsigned int vui_interpl_x_upper_range = spu_convtu( vf_src_x_upper_range, 0 );
-		vector float vf_interpl_x_lower_range = spu_convtf( vui_interpl_x_lower_range, 0 );
-		vector float vf_interpl_x_upper_range = spu_convtf( vui_interpl_x_upper_range, 0 );
-		vector float vf_EWweight_lower_range = spu_sub( vf_src_x_lower_range, vf_interpl_x_lower_range );
-		vector float vf_EWweight_upper_range = spu_sub( vf_src_x_upper_range, vf_interpl_x_upper_range );
-
-		// calculate address offset
-		//
-		// pixel NORTH WEST
-		vector unsigned int vui_off_pixelNW_lower_range = vui_interpl_x_lower_range;
-		vector unsigned int vui_off_pixelNW_upper_range = vui_interpl_x_upper_range;
-
-		// pixel NORTH EAST-->(offpixelNW+1)
-		vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
-		vector unsigned int vui_off_pixelNE_lower_range = spu_add( vui_off_pixelNW_lower_range, vui_add_1 );
-		vector unsigned int vui_off_pixelNE_upper_range = spu_add( vui_off_pixelNW_upper_range, vui_add_1 );
-
-		// SOUTH-WEST-->(offpixelNW+src_linestride)
-		vector unsigned int vui_srclinestride = spu_splats( src_linestride );
-		vector unsigned int vui_off_pixelSW_lower_range = spu_add( vui_srclinestride, vui_off_pixelNW_lower_range );
-		vector unsigned int vui_off_pixelSW_upper_range = spu_add( vui_srclinestride, vui_off_pixelNW_upper_range );
-
-		// SOUTH-EAST-->(offpixelNW+src_linestride+1)
-		vector unsigned int vui_off_pixelSE_lower_range = spu_add( vui_srclinestride, vui_off_pixelNE_lower_range );
-		vector unsigned int vui_off_pixelSE_upper_range = spu_add( vui_srclinestride, vui_off_pixelNE_upper_range );
-
-		// calculate each address
-		vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
-		vector unsigned int vui_addr_pixelNW_lower_range = spu_add( vui_src_ls, vui_off_pixelNW_lower_range );
-		vector unsigned int vui_addr_pixelNW_upper_range = spu_add( vui_src_ls, vui_off_pixelNW_upper_range );
-		vector unsigned int vui_addr_pixelNE_lower_range = spu_add( vui_src_ls, vui_off_pixelNE_lower_range );
-		vector unsigned int vui_addr_pixelNE_upper_range = spu_add( vui_src_ls, vui_off_pixelNE_upper_range );
-
-		vector unsigned int vui_addr_pixelSW_lower_range = spu_add( vui_src_ls, vui_off_pixelSW_lower_range );
-		vector unsigned int vui_addr_pixelSW_upper_range = spu_add( vui_src_ls, vui_off_pixelSW_upper_range );
-		vector unsigned int vui_addr_pixelSE_lower_range = spu_add( vui_src_ls, vui_off_pixelSE_lower_range );
-		vector unsigned int vui_addr_pixelSE_upper_range = spu_add( vui_src_ls, vui_off_pixelSE_upper_range );
-
-		// get each pixel
-		//
-		// scalar load, afterwards insertion into the right position
-		// NORTH WEST
-		vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-		vector unsigned char vuc_pixel_NW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 1 )),
-				vuc_pixel_NW_lower_range, 7 );
-		vuc_pixel_NW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 2 )),
-				vuc_pixel_NW_lower_range, 11 );
-		vuc_pixel_NW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_lower_range, 3 )),
-				vuc_pixel_NW_lower_range, 15 );
-
-		vector unsigned char vuc_pixel_NW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 1 )),
-				vuc_pixel_NW_upper_range, 7 );
-		vuc_pixel_NW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 2 )),
-				vuc_pixel_NW_upper_range, 11 );
-		vuc_pixel_NW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_upper_range, 3 )),
-				vuc_pixel_NW_upper_range, 15 );
-
-		// NORTH EAST
-		vector unsigned char vuc_pixel_NE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 1 )),
-				vuc_pixel_NE_lower_range, 7 );
-		vuc_pixel_NE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 2 )),
-				vuc_pixel_NE_lower_range, 11 );
-		vuc_pixel_NE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_lower_range, 3 )),
-				vuc_pixel_NE_lower_range, 15 );
-
-		vector unsigned char vuc_pixel_NE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 1 )),
-				vuc_pixel_NE_upper_range, 7 );
-		vuc_pixel_NE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 2 )),
-				vuc_pixel_NE_upper_range, 11 );
-		vuc_pixel_NE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_upper_range, 3 )),
-				vuc_pixel_NE_upper_range, 15 );
-
-
-		// SOUTH WEST
-		vector unsigned char vuc_pixel_SW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 1 )),
-				vuc_pixel_SW_lower_range, 7 );
-		vuc_pixel_SW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 2 )),
-				vuc_pixel_SW_lower_range, 11 );
-		vuc_pixel_SW_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_lower_range, 3 )),
-				vuc_pixel_SW_lower_range, 15 );
-
-		vector unsigned char vuc_pixel_SW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 1 )),
-				vuc_pixel_SW_upper_range, 7 );
-		vuc_pixel_SW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 2 )),
-				vuc_pixel_SW_upper_range, 11 );
-		vuc_pixel_SW_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_upper_range, 3 )),
-				vuc_pixel_SW_upper_range, 15 );
-
-		// SOUTH EAST
-		vector unsigned char vuc_pixel_SE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 1 )),
-				vuc_pixel_SE_lower_range, 7 );
-		vuc_pixel_SE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 2 )),
-				vuc_pixel_SE_lower_range, 11 );
-		vuc_pixel_SE_lower_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_lower_range, 3 )),
-				vuc_pixel_SE_lower_range, 15 );
-
-		vector unsigned char vuc_pixel_SE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 1 )),
-				vuc_pixel_SE_upper_range, 7 );
-		vuc_pixel_SE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 2 )),
-				vuc_pixel_SE_upper_range, 11 );
-		vuc_pixel_SE_upper_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_upper_range, 3 )),
-				vuc_pixel_SE_upper_range, 15 );
-
-
-		// convert to float
-		vector float vf_pixel_NW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_lower_range, 0 );
-		vector float vf_pixel_NW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_upper_range, 0 );
-
-		vector float vf_pixel_SW_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_lower_range, 0 );
-		vector float vf_pixel_SW_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_upper_range, 0 );
-
-		vector float vf_pixel_NE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_lower_range, 0 );
-		vector float vf_pixel_NE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_upper_range, 0 );
-
-		vector float vf_pixel_SE_lower_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_lower_range, 0 );
-		vector float vf_pixel_SE_upper_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_upper_range, 0 );
-
-
-
-		// first linear interpolation: EWtop
-		// EWtop = NW + EWweight*(NE-NW)
-		//
-		// lower range
-		vector float vf_EWtop_lower_range_tmp = spu_sub( vf_pixel_NE_lower_range, vf_pixel_NW_lower_range );
-		vector float vf_EWtop_lower_range = spu_madd( vf_EWweight_lower_range,
-								vf_EWtop_lower_range_tmp,
-								vf_pixel_NW_lower_range );
-
-		// upper range
-		vector float vf_EWtop_upper_range_tmp = spu_sub( vf_pixel_NE_upper_range, vf_pixel_NW_upper_range );
-		vector float vf_EWtop_upper_range = spu_madd( vf_EWweight_upper_range,
-								vf_EWtop_upper_range_tmp,
-								vf_pixel_NW_upper_range );
-
-
-
-		// second linear interpolation: EWbottom
-		// EWbottom = SW + EWweight*(SE-SW)
-		//
-		// lower range
-		vector float vf_EWbottom_lower_range_tmp = spu_sub( vf_pixel_SE_lower_range, vf_pixel_SW_lower_range );
-		vector float vf_EWbottom_lower_range = spu_madd( vf_EWweight_lower_range,
-								vf_EWbottom_lower_range_tmp,
-								vf_pixel_SW_lower_range );
-
-		// upper range
-		vector float vf_EWbottom_upper_range_tmp = spu_sub( vf_pixel_SE_upper_range, vf_pixel_SW_upper_range );
-		vector float vf_EWbottom_upper_range = spu_madd( vf_EWweight_upper_range,
-								vf_EWbottom_upper_range_tmp,
-								vf_pixel_SW_upper_range );
-
-
-
-		// third linear interpolation: the bilinear interpolated value
-		// result = EWtop + NSweight*(EWbottom-EWtop);
-		//
-		// lower range
-		vector float vf_result_lower_range_tmp = spu_sub( vf_EWbottom_lower_range, vf_EWtop_lower_range );
-		vector float vf_result_lower_range = spu_madd( vf_NSweight,
-								vf_result_lower_range_tmp,
-								vf_EWtop_lower_range );
-
-		// upper range
-		vector float vf_result_upper_range_tmp = spu_sub( vf_EWbottom_upper_range, vf_EWtop_upper_range );
-		vector float vf_result_upper_range = spu_madd( vf_NSweight,
-								vf_result_upper_range_tmp,
-								vf_EWtop_upper_range );
-
-
-		// convert back: using saturated arithmetic
-		vector unsigned int vui_result_lower_range = vfloat_to_vuint( vf_result_lower_range );
-		vector unsigned int vui_result_upper_range = vfloat_to_vuint( vf_result_upper_range );
-
-		// merge results->lower,upper
-		vector unsigned char vuc_mask_merge_result = { 0x03, 0x07, 0x0B, 0x0F,
-							       0x13, 0x17, 0x1B, 0x1F,
-							       0x00, 0x00, 0x00, 0x00,
-							       0x00, 0x00, 0x00, 0x00 };
-
-		vector unsigned char vuc_result = spu_shuffle( (vector unsigned char) vui_result_lower_range,
-								(vector unsigned char) vui_result_upper_range,
-								vuc_mask_merge_result );
-
-		// partial storing
-		vector unsigned char vuc_mask_out = { 0x00, 0x00, 0x00, 0x00,
-						      0x00, 0x00, 0x00, 0x00,
-						      0xFF, 0xFF, 0xFF, 0xFF,
-						      0xFF, 0xFF, 0xFF, 0xFF };
-
-
-		// get currently stored data
-		vector unsigned char vuc_orig = *((vector unsigned char*)dst);
-
-		// clear currently stored data
-		vuc_orig = spu_and( vuc_orig,
-				spu_rlqwbyte( vuc_mask_out, ((unsigned int)dst)&0x0F) );
-
-		// rotate result according to storing address
-		vuc_result = spu_rlqwbyte( vuc_result, ((unsigned int)dst)&0x0F );
-
-		// store result
-		*((vector unsigned char*)dst) = spu_or( vuc_result,
-							vuc_orig );
-		dst += 8;
-	}
-}
-
-
-/*
- * bilinear_scale_line_w16()
- *
- * processes a line of yuv-input, width has to be a multiple of 16
- * scaled yuv-output is written to local store buffer
- *
- * @param src buffer for 2 lines input
- * @param dst_ buffer for 1 line output
- * @param dst_width the width of the destination line
- * @param vf_x_scale a float vector, at each entry is the x_scale-factor
- * @param vf_NSweight a float vector, at each position is the weight NORTH/SOUTH for the current line
- * @param src_linestride the stride of the srcline
- */
-void bilinear_scale_line_w16( unsigned char* src, unsigned char* dst_, unsigned int dst_width, vector float vf_x_scale, vector float vf_NSweight, unsigned int src_linestride ) {
-
-	unsigned char* dst = dst_;
-
-	unsigned int dst_x;
-	for( dst_x=0; dst_x<dst_width; dst_x+=16) {
-		// address calculation for loading the 4 surrounding pixel of each calculated
-		// destination pixel
-		vector unsigned int vui_dst_x_tmp = spu_splats( dst_x );
-		// parallelised processing
-		// first range->pixel 1 2 3 4
-		// second range->pixel 5 6 7 8
-		// third range->pixel 9 10 11 12
-		// fourth range->pixel 13 14 15 16
-		vector unsigned int vui_inc_dst_x_first_range = { 0, 1, 2, 3 };
-		vector unsigned int vui_inc_dst_x_second_range = { 4, 5, 6, 7 };
-		vector unsigned int vui_inc_dst_x_third_range = { 8, 9, 10, 11 };
-		vector unsigned int vui_inc_dst_x_fourth_range = { 12, 13, 14, 15 };
-		vector unsigned int vui_dst_x_first_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_first_range );
-		vector unsigned int vui_dst_x_second_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_second_range );
-		vector unsigned int vui_dst_x_third_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_third_range );
-		vector unsigned int vui_dst_x_fourth_range = spu_add( vui_dst_x_tmp, vui_inc_dst_x_fourth_range );
-
-		// calculate weight EAST-WEST
-		vector float vf_dst_x_first_range = spu_convtf( vui_dst_x_first_range, 0 );
-		vector float vf_dst_x_second_range = spu_convtf( vui_dst_x_second_range, 0 );
-		vector float vf_dst_x_third_range = spu_convtf( vui_dst_x_third_range, 0 );
-		vector float vf_dst_x_fourth_range = spu_convtf( vui_dst_x_fourth_range, 0 );
-		vector float vf_src_x_first_range = spu_mul( vf_dst_x_first_range, vf_x_scale );
-		vector float vf_src_x_second_range = spu_mul( vf_dst_x_second_range, vf_x_scale );
-		vector float vf_src_x_third_range = spu_mul( vf_dst_x_third_range, vf_x_scale );
-		vector float vf_src_x_fourth_range = spu_mul( vf_dst_x_fourth_range, vf_x_scale );
-		vector unsigned int vui_interpl_x_first_range = spu_convtu( vf_src_x_first_range, 0 );
-		vector unsigned int vui_interpl_x_second_range = spu_convtu( vf_src_x_second_range, 0 );
-		vector unsigned int vui_interpl_x_third_range = spu_convtu( vf_src_x_third_range, 0 );
-		vector unsigned int vui_interpl_x_fourth_range = spu_convtu( vf_src_x_fourth_range, 0 );
-		vector float vf_interpl_x_first_range = spu_convtf( vui_interpl_x_first_range, 0 );
-		vector float vf_interpl_x_second_range = spu_convtf( vui_interpl_x_second_range, 0 );
-		vector float vf_interpl_x_third_range = spu_convtf( vui_interpl_x_third_range, 0 );
-		vector float vf_interpl_x_fourth_range = spu_convtf( vui_interpl_x_fourth_range, 0 );
-		vector float vf_EWweight_first_range = spu_sub( vf_src_x_first_range, vf_interpl_x_first_range );
-		vector float vf_EWweight_second_range = spu_sub( vf_src_x_second_range, vf_interpl_x_second_range );
-		vector float vf_EWweight_third_range = spu_sub( vf_src_x_third_range, vf_interpl_x_third_range );
-		vector float vf_EWweight_fourth_range = spu_sub( vf_src_x_fourth_range, vf_interpl_x_fourth_range );
-
-		// calculate address offset
-		//
-		// pixel NORTH WEST
-		vector unsigned int vui_off_pixelNW_first_range = vui_interpl_x_first_range;
-		vector unsigned int vui_off_pixelNW_second_range = vui_interpl_x_second_range;
-		vector unsigned int vui_off_pixelNW_third_range = vui_interpl_x_third_range;
-		vector unsigned int vui_off_pixelNW_fourth_range = vui_interpl_x_fourth_range;
-
-		// pixel NORTH EAST-->(offpixelNW+1)
-		vector unsigned int vui_add_1 = { 1, 1, 1, 1 };
-		vector unsigned int vui_off_pixelNE_first_range = spu_add( vui_off_pixelNW_first_range, vui_add_1 );
-		vector unsigned int vui_off_pixelNE_second_range = spu_add( vui_off_pixelNW_second_range, vui_add_1 );
-		vector unsigned int vui_off_pixelNE_third_range = spu_add( vui_off_pixelNW_third_range, vui_add_1 );
-		vector unsigned int vui_off_pixelNE_fourth_range = spu_add( vui_off_pixelNW_fourth_range, vui_add_1 );
-
-		// SOUTH-WEST-->(offpixelNW+src_linestride)
-		vector unsigned int vui_srclinestride = spu_splats( src_linestride );
-		vector unsigned int vui_off_pixelSW_first_range = spu_add( vui_srclinestride, vui_off_pixelNW_first_range );
-		vector unsigned int vui_off_pixelSW_second_range = spu_add( vui_srclinestride, vui_off_pixelNW_second_range );
-		vector unsigned int vui_off_pixelSW_third_range = spu_add( vui_srclinestride, vui_off_pixelNW_third_range );
-		vector unsigned int vui_off_pixelSW_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNW_fourth_range );
-
-		// SOUTH-EAST-->(offpixelNW+src_linestride+1)
-		vector unsigned int vui_off_pixelSE_first_range = spu_add( vui_srclinestride, vui_off_pixelNE_first_range );
-		vector unsigned int vui_off_pixelSE_second_range = spu_add( vui_srclinestride, vui_off_pixelNE_second_range );
-		vector unsigned int vui_off_pixelSE_third_range = spu_add( vui_srclinestride, vui_off_pixelNE_third_range );
-		vector unsigned int vui_off_pixelSE_fourth_range = spu_add( vui_srclinestride, vui_off_pixelNE_fourth_range );
-
-		// calculate each address
-		vector unsigned int vui_src_ls = spu_splats( (unsigned int) src );
-		vector unsigned int vui_addr_pixelNW_first_range = spu_add( vui_src_ls, vui_off_pixelNW_first_range );
-		vector unsigned int vui_addr_pixelNW_second_range = spu_add( vui_src_ls, vui_off_pixelNW_second_range );
-		vector unsigned int vui_addr_pixelNW_third_range = spu_add( vui_src_ls, vui_off_pixelNW_third_range );
-		vector unsigned int vui_addr_pixelNW_fourth_range = spu_add( vui_src_ls, vui_off_pixelNW_fourth_range );
-
-		vector unsigned int vui_addr_pixelNE_first_range = spu_add( vui_src_ls, vui_off_pixelNE_first_range );
-		vector unsigned int vui_addr_pixelNE_second_range = spu_add( vui_src_ls, vui_off_pixelNE_second_range );
-		vector unsigned int vui_addr_pixelNE_third_range = spu_add( vui_src_ls, vui_off_pixelNE_third_range );
-		vector unsigned int vui_addr_pixelNE_fourth_range = spu_add( vui_src_ls, vui_off_pixelNE_fourth_range );
-
-		vector unsigned int vui_addr_pixelSW_first_range = spu_add( vui_src_ls, vui_off_pixelSW_first_range );
-		vector unsigned int vui_addr_pixelSW_second_range = spu_add( vui_src_ls, vui_off_pixelSW_second_range );
-		vector unsigned int vui_addr_pixelSW_third_range = spu_add( vui_src_ls, vui_off_pixelSW_third_range );
-		vector unsigned int vui_addr_pixelSW_fourth_range = spu_add( vui_src_ls, vui_off_pixelSW_fourth_range );
-
-		vector unsigned int vui_addr_pixelSE_first_range = spu_add( vui_src_ls, vui_off_pixelSE_first_range );
-		vector unsigned int vui_addr_pixelSE_second_range = spu_add( vui_src_ls, vui_off_pixelSE_second_range );
-		vector unsigned int vui_addr_pixelSE_third_range = spu_add( vui_src_ls, vui_off_pixelSE_third_range );
-		vector unsigned int vui_addr_pixelSE_fourth_range = spu_add( vui_src_ls, vui_off_pixelSE_fourth_range );
-
-
-		// get each pixel
-		//
-		// scalar load, afterwards insertion into the right position
-		// NORTH WEST
-		// first range
-		vector unsigned char null_vector = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-		vector unsigned char vuc_pixel_NW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 1 )),
-				vuc_pixel_NW_first_range, 7 );
-		vuc_pixel_NW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 2 )),
-				vuc_pixel_NW_first_range, 11 );
-		vuc_pixel_NW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_first_range, 3 )),
-				vuc_pixel_NW_first_range, 15 );
-		// second range
-		vector unsigned char vuc_pixel_NW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 1 )),
-				vuc_pixel_NW_second_range, 7 );
-		vuc_pixel_NW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 2 )),
-				vuc_pixel_NW_second_range, 11 );
-		vuc_pixel_NW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_second_range, 3 )),
-				vuc_pixel_NW_second_range, 15 );
-		// third range
-		vector unsigned char vuc_pixel_NW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 1 )),
-				vuc_pixel_NW_third_range, 7 );
-		vuc_pixel_NW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 2 )),
-				vuc_pixel_NW_third_range, 11 );
-		vuc_pixel_NW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_third_range, 3 )),
-				vuc_pixel_NW_third_range, 15 );
-		// fourth range
-		vector unsigned char vuc_pixel_NW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 0 )), null_vector, 3 );
-		vuc_pixel_NW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 1 )),
-				vuc_pixel_NW_fourth_range, 7 );
-		vuc_pixel_NW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 2 )),
-				vuc_pixel_NW_fourth_range, 11 );
-		vuc_pixel_NW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNW_fourth_range, 3 )),
-				vuc_pixel_NW_fourth_range, 15 );
-
-		// NORTH EAST
-		// first range
-		vector unsigned char vuc_pixel_NE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 1 )),
-				vuc_pixel_NE_first_range, 7 );
-		vuc_pixel_NE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 2 )),
-				vuc_pixel_NE_first_range, 11 );
-		vuc_pixel_NE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_first_range, 3 )),
-				vuc_pixel_NE_first_range, 15 );
-		// second range
-		vector unsigned char vuc_pixel_NE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 1 )),
-				vuc_pixel_NE_second_range, 7 );
-		vuc_pixel_NE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 2 )),
-				vuc_pixel_NE_second_range, 11 );
-		vuc_pixel_NE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_second_range, 3 )),
-				vuc_pixel_NE_second_range, 15 );
-		// third range
-		vector unsigned char vuc_pixel_NE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 1 )),
-				vuc_pixel_NE_third_range, 7 );
-		vuc_pixel_NE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 2 )),
-				vuc_pixel_NE_third_range, 11 );
-		vuc_pixel_NE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_third_range, 3 )),
-				vuc_pixel_NE_third_range, 15 );
-		// fourth range
-		vector unsigned char vuc_pixel_NE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 0 )), null_vector, 3 );
-		vuc_pixel_NE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 1 )),
-				vuc_pixel_NE_fourth_range, 7 );
-		vuc_pixel_NE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 2 )),
-				vuc_pixel_NE_fourth_range, 11 );
-		vuc_pixel_NE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelNE_fourth_range, 3 )),
-				vuc_pixel_NE_fourth_range, 15 );
-
-		// SOUTH WEST
-		// first range
-		vector unsigned char vuc_pixel_SW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 1 )),
-				vuc_pixel_SW_first_range, 7 );
-		vuc_pixel_SW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 2 )),
-				vuc_pixel_SW_first_range, 11 );
-		vuc_pixel_SW_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_first_range, 3 )),
-				vuc_pixel_SW_first_range, 15 );
-		// second range
-		vector unsigned char vuc_pixel_SW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 1 )),
-				vuc_pixel_SW_second_range, 7 );
-		vuc_pixel_SW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 2 )),
-				vuc_pixel_SW_second_range, 11 );
-		vuc_pixel_SW_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_second_range, 3 )),
-				vuc_pixel_SW_second_range, 15 );
-		// third range
-		vector unsigned char vuc_pixel_SW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 1 )),
-				vuc_pixel_SW_third_range, 7 );
-		vuc_pixel_SW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 2 )),
-				vuc_pixel_SW_third_range, 11 );
-		vuc_pixel_SW_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_third_range, 3 )),
-				vuc_pixel_SW_third_range, 15 );
-		// fourth range
-		vector unsigned char vuc_pixel_SW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 0 )), null_vector, 3 );
-		vuc_pixel_SW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 1 )),
-				vuc_pixel_SW_fourth_range, 7 );
-		vuc_pixel_SW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 2 )),
-				vuc_pixel_SW_fourth_range, 11 );
-		vuc_pixel_SW_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSW_fourth_range, 3 )),
-				vuc_pixel_SW_fourth_range, 15 );
-
-		// NORTH EAST
-		// first range
-		vector unsigned char vuc_pixel_SE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 1 )),
-				vuc_pixel_SE_first_range, 7 );
-		vuc_pixel_SE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 2 )),
-				vuc_pixel_SE_first_range, 11 );
-		vuc_pixel_SE_first_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_first_range, 3 )),
-				vuc_pixel_SE_first_range, 15 );
-		// second range
-		vector unsigned char vuc_pixel_SE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 1 )),
-				vuc_pixel_SE_second_range, 7 );
-		vuc_pixel_SE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 2 )),
-				vuc_pixel_SE_second_range, 11 );
-		vuc_pixel_SE_second_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_second_range, 3 )),
-				vuc_pixel_SE_second_range, 15 );
-		// third range
-		vector unsigned char vuc_pixel_SE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 1 )),
-				vuc_pixel_SE_third_range, 7 );
-		vuc_pixel_SE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 2 )),
-				vuc_pixel_SE_third_range, 11 );
-		vuc_pixel_SE_third_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_third_range, 3 )),
-				vuc_pixel_SE_third_range, 15 );
-		// fourth range
-		vector unsigned char vuc_pixel_SE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 0 )), null_vector, 3 );
-		vuc_pixel_SE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 1 )),
-				vuc_pixel_SE_fourth_range, 7 );
-		vuc_pixel_SE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 2 )),
-				vuc_pixel_SE_fourth_range, 11 );
-		vuc_pixel_SE_fourth_range = spu_insert(
-				*((unsigned char*) spu_extract( vui_addr_pixelSE_fourth_range, 3 )),
-				vuc_pixel_SE_fourth_range, 15 );
-
-
-
-		// convert to float
-		vector float vf_pixel_NW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_first_range, 0 );
-		vector float vf_pixel_NW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_second_range, 0 );
-		vector float vf_pixel_NW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_third_range, 0 );
-		vector float vf_pixel_NW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NW_fourth_range, 0 );
-
-		vector float vf_pixel_NE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_first_range, 0 );
-		vector float vf_pixel_NE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_second_range, 0 );
-		vector float vf_pixel_NE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_third_range, 0 );
-		vector float vf_pixel_NE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_NE_fourth_range, 0 );
-
-		vector float vf_pixel_SW_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_first_range, 0 );
-		vector float vf_pixel_SW_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_second_range, 0 );
-		vector float vf_pixel_SW_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_third_range, 0 );
-		vector float vf_pixel_SW_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SW_fourth_range, 0 );
-
-		vector float vf_pixel_SE_first_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_first_range, 0 );
-		vector float vf_pixel_SE_second_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_second_range, 0 );
-		vector float vf_pixel_SE_third_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_third_range, 0 );
-		vector float vf_pixel_SE_fourth_range = spu_convtf( (vector unsigned int) vuc_pixel_SE_fourth_range, 0 );
-
-		// first linear interpolation: EWtop
-		// EWtop = NW + EWweight*(NE-NW)
-		//
-		// first range
-		vector float vf_EWtop_first_range_tmp = spu_sub( vf_pixel_NE_first_range, vf_pixel_NW_first_range );
-		vector float vf_EWtop_first_range = spu_madd( vf_EWweight_first_range,
-								vf_EWtop_first_range_tmp,
-								vf_pixel_NW_first_range );
-
-		// second range
-		vector float vf_EWtop_second_range_tmp = spu_sub( vf_pixel_NE_second_range, vf_pixel_NW_second_range );
-		vector float vf_EWtop_second_range = spu_madd( vf_EWweight_second_range,
-								vf_EWtop_second_range_tmp,
-								vf_pixel_NW_second_range );
-
-		// third range
-		vector float vf_EWtop_third_range_tmp = spu_sub( vf_pixel_NE_third_range, vf_pixel_NW_third_range );
-		vector float vf_EWtop_third_range = spu_madd( vf_EWweight_third_range,
-								vf_EWtop_third_range_tmp,
-								vf_pixel_NW_third_range );
-
-		// fourth range
-		vector float vf_EWtop_fourth_range_tmp = spu_sub( vf_pixel_NE_fourth_range, vf_pixel_NW_fourth_range );
-		vector float vf_EWtop_fourth_range = spu_madd( vf_EWweight_fourth_range,
-								vf_EWtop_fourth_range_tmp,
-								vf_pixel_NW_fourth_range );
-
-
-
-		// second linear interpolation: EWbottom
-		// EWbottom = SW + EWweight*(SE-SW)
-		//
-		// first range
-		vector float vf_EWbottom_first_range_tmp = spu_sub( vf_pixel_SE_first_range, vf_pixel_SW_first_range );
-		vector float vf_EWbottom_first_range = spu_madd( vf_EWweight_first_range,
-								vf_EWbottom_first_range_tmp,
-								vf_pixel_SW_first_range );
-
-		// second range
-		vector float vf_EWbottom_second_range_tmp = spu_sub( vf_pixel_SE_second_range, vf_pixel_SW_second_range );
-		vector float vf_EWbottom_second_range = spu_madd( vf_EWweight_second_range,
-								vf_EWbottom_second_range_tmp,
-								vf_pixel_SW_second_range );
-		// first range
-		vector float vf_EWbottom_third_range_tmp = spu_sub( vf_pixel_SE_third_range, vf_pixel_SW_third_range );
-		vector float vf_EWbottom_third_range = spu_madd( vf_EWweight_third_range,
-								vf_EWbottom_third_range_tmp,
-								vf_pixel_SW_third_range );
-
-		// first range
-		vector float vf_EWbottom_fourth_range_tmp = spu_sub( vf_pixel_SE_fourth_range, vf_pixel_SW_fourth_range );
-		vector float vf_EWbottom_fourth_range = spu_madd( vf_EWweight_fourth_range,
-								vf_EWbottom_fourth_range_tmp,
-								vf_pixel_SW_fourth_range );
-
-
-
-		// third linear interpolation: the bilinear interpolated value
-		// result = EWtop + NSweight*(EWbottom-EWtop);
-		//
-		// first range
-		vector float vf_result_first_range_tmp = spu_sub( vf_EWbottom_first_range, vf_EWtop_first_range );
-		vector float vf_result_first_range = spu_madd( vf_NSweight,
-								vf_result_first_range_tmp,
-								vf_EWtop_first_range );
-
-		// second range
-		vector float vf_result_second_range_tmp = spu_sub( vf_EWbottom_second_range, vf_EWtop_second_range );
-		vector float vf_result_second_range = spu_madd( vf_NSweight,
-								vf_result_second_range_tmp,
-								vf_EWtop_second_range );
-
-		// third range
-		vector float vf_result_third_range_tmp = spu_sub( vf_EWbottom_third_range, vf_EWtop_third_range );
-		vector float vf_result_third_range = spu_madd( vf_NSweight,
-								vf_result_third_range_tmp,
-								vf_EWtop_third_range );
-
-		// fourth range
-		vector float vf_result_fourth_range_tmp = spu_sub( vf_EWbottom_fourth_range, vf_EWtop_fourth_range );
-		vector float vf_result_fourth_range = spu_madd( vf_NSweight,
-								vf_result_fourth_range_tmp,
-								vf_EWtop_fourth_range );
-
-
-
-		// convert back: using saturated arithmetic
-		vector unsigned int vui_result_first_range = vfloat_to_vuint( vf_result_first_range );
-		vector unsigned int vui_result_second_range = vfloat_to_vuint( vf_result_second_range );
-		vector unsigned int vui_result_third_range = vfloat_to_vuint( vf_result_third_range );
-		vector unsigned int vui_result_fourth_range = vfloat_to_vuint( vf_result_fourth_range );
-
-		// merge results->lower,upper
-		vector unsigned char vuc_mask_merge_result_first_second = { 0x03, 0x07, 0x0B, 0x0F,
-							       		    0x13, 0x17, 0x1B, 0x1F,
-							       		    0x00, 0x00, 0x00, 0x00,
-							       		    0x00, 0x00, 0x00, 0x00 };
-
-		vector unsigned char vuc_mask_merge_result_third_fourth = { 0x00, 0x00, 0x00, 0x00,
-							       		    0x00, 0x00, 0x00, 0x00,
-									    0x03, 0x07, 0x0B, 0x0F,
-							       		    0x13, 0x17, 0x1B, 0x1F };
-
-		vector unsigned char vuc_result_first_second =
-						spu_shuffle( (vector unsigned char) vui_result_first_range,
-								 (vector unsigned char) vui_result_second_range,
-								vuc_mask_merge_result_first_second );
-
-		vector unsigned char vuc_result_third_fourth =
-						spu_shuffle( (vector unsigned char) vui_result_third_range,
-								 (vector unsigned char) vui_result_fourth_range,
-								vuc_mask_merge_result_third_fourth );
-
-		// store result
-		*((vector unsigned char*)dst) = spu_or( vuc_result_first_second,
-							vuc_result_third_fourth );
-		dst += 16;
-	}
-}
-
--- a/src/video/ps3/spulibs/fb_writer.c	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,193 +0,0 @@
-/*
- * SDL - Simple DirectMedia Layer
- * CELL BE Support for PS3 Framebuffer
- * Copyright (C) 2008, 2009 International Business Machines Corporation
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
- * USA
- *
- *  Martin Lowinski  <lowinski [at] de [dot] ibm [ibm] com>
- *  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
- *  SPE code based on research by:
- *  Rene Becker
- *  Thimo Emmerich
- */
-
-#include "spu_common.h"
-
-#include <spu_intrinsics.h>
-#include <spu_mfcio.h>
-#include <stdio.h>
-#include <string.h>
-
-// Debugging
-//#define DEBUG
-
-#ifdef DEBUG
-#define deprintf(fmt, args... ) \
-	fprintf( stdout, fmt, ##args ); \
-	fflush( stdout );
-#else
-#define deprintf( fmt, args... )
-#endif
-
-void cpy_to_fb(unsigned int);
-
-/* fb_writer_spu parms */
-static volatile struct fb_writer_parms_t parms __attribute__ ((aligned(128)));
-
-/* Code running on SPU */
-int main(unsigned long long spe_id __attribute__ ((unused)), unsigned long long argp __attribute__ ((unused)))
-{
-	deprintf("[SPU] fb_writer_spu is up... (on SPE #%llu)\n", spe_id);
-	uint32_t ea_mfc, mbox;
-	// send ready message
-	spu_write_out_mbox(SPU_READY);
-
-	while (1) {
-		/* Check mailbox */
-		mbox = spu_read_in_mbox();
-		deprintf("[SPU] Message is %u\n", mbox);
-		switch (mbox) {
-			case SPU_EXIT:
-				deprintf("[SPU] fb_writer goes down...\n");
-				return 0;
-			case SPU_START:
-				break;
-			default:
-				deprintf("[SPU] Cannot handle message\n");
-				continue;
-		}
-
-		/* Tag Manager setup */
-		unsigned int tags;
-		tags = mfc_multi_tag_reserve(5);
-		if (tags == MFC_TAG_INVALID) {
-			deprintf("[SPU] Failed to reserve mfc tags on fb_writer\n");
-			return 0;
-		}
-
-		/* Framebuffer parms */
-		ea_mfc = spu_read_in_mbox();
-		deprintf("[SPU] Message on fb_writer is %u\n", ea_mfc);
-		spu_mfcdma32(&parms, (unsigned int)ea_mfc,
-				sizeof(struct fb_writer_parms_t), tags,
-				MFC_GET_CMD);
-		deprintf("[SPU] argp = %u\n", (unsigned int)argp);
-		DMA_WAIT_TAG(tags);
-
-		/* Copy parms->data to framebuffer */
-		deprintf("[SPU] Copying to framebuffer started\n");
-		cpy_to_fb(tags);
-		deprintf("[SPU] Copying to framebuffer done!\n");
-
-		mfc_multi_tag_release(tags, 5);
-		deprintf("[SPU] fb_writer_spu... done!\n");
-		/* Send FIN msg */
-		spu_write_out_mbox(SPU_FIN);
-	}
-
-	return 0;
-}
-
-void cpy_to_fb(unsigned int tag_id_base)
-{
-	unsigned int i;
-	unsigned char current_buf;
-	uint8_t *in = parms.data;
-
-	/* Align fb pointer which was centered before */
-	uint8_t *fb =
-	    (unsigned char *)((unsigned int)parms.center & 0xFFFFFFF0);
-
-	uint32_t bounded_input_height = parms.bounded_input_height;
-	uint32_t bounded_input_width = parms.bounded_input_width;
-	uint32_t fb_pixel_size = parms.fb_pixel_size;
-
-	uint32_t out_line_stride = parms.out_line_stride;
-	uint32_t in_line_stride = parms.in_line_stride;
-	uint32_t in_line_size = bounded_input_width * fb_pixel_size;
-
-	current_buf = 0;
-
-	/* Local store buffer */
-	static volatile uint8_t buf[4][BUFFER_SIZE]
-	    __attribute__ ((aligned(128)));
-	/* do 4-times multibuffering using DMA list, process in two steps */
-	for (i = 0; i < bounded_input_height >> 2; i++) {
-		/* first buffer */
-		DMA_WAIT_TAG(tag_id_base + 1);
-		// retrieve buffer
-		spu_mfcdma32(buf[0], (unsigned int)in, in_line_size,
-			     tag_id_base + 1, MFC_GETB_CMD);
-		DMA_WAIT_TAG(tag_id_base + 1);
-		// store buffer
-		spu_mfcdma32(buf[0], (unsigned int)fb, in_line_size,
-			     tag_id_base + 1, MFC_PUTB_CMD);
-		in += in_line_stride;
-		fb += out_line_stride;
-		deprintf("[SPU] 1st buffer copied in=0x%x, fb=0x%x\n", in,
-		       fb);
-
-		/* second buffer */
-		DMA_WAIT_TAG(tag_id_base + 2);
-		// retrieve buffer
-		spu_mfcdma32(buf[1], (unsigned int)in, in_line_size,
-			     tag_id_base + 2, MFC_GETB_CMD);
-		DMA_WAIT_TAG(tag_id_base + 2);
-		// store buffer
-		spu_mfcdma32(buf[1], (unsigned int)fb, in_line_size,
-			     tag_id_base + 2, MFC_PUTB_CMD);
-		in += in_line_stride;
-		fb += out_line_stride;
-		deprintf("[SPU] 2nd buffer copied in=0x%x, fb=0x%x\n", in,
-		       fb);
-
-		/* third buffer */
-		DMA_WAIT_TAG(tag_id_base + 3);
-		// retrieve buffer
-		spu_mfcdma32(buf[2], (unsigned int)in, in_line_size,
-			     tag_id_base + 3, MFC_GETB_CMD);
-		DMA_WAIT_TAG(tag_id_base + 3);
-		// store buffer
-		spu_mfcdma32(buf[2], (unsigned int)fb, in_line_size,
-			     tag_id_base + 3, MFC_PUTB_CMD);
-		in += in_line_stride;
-		fb += out_line_stride;
-		deprintf("[SPU] 3rd buffer copied in=0x%x, fb=0x%x\n", in,
-		       fb);
-
-		/* fourth buffer */
-		DMA_WAIT_TAG(tag_id_base + 4);
-		// retrieve buffer
-		spu_mfcdma32(buf[3], (unsigned int)in, in_line_size,
-			     tag_id_base + 4, MFC_GETB_CMD);
-		DMA_WAIT_TAG(tag_id_base + 4);
-		// store buffer
-		spu_mfcdma32(buf[3], (unsigned int)fb, in_line_size,
-			     tag_id_base + 4, MFC_PUTB_CMD);
-		in += in_line_stride;
-		fb += out_line_stride;
-		deprintf("[SPU] 4th buffer copied in=0x%x, fb=0x%x\n", in,
-		       fb);
-		deprintf("[SPU] Loop #%i, bounded_input_height=%i\n", i,
-		       bounded_input_height >> 2);
-	}
-	DMA_WAIT_TAG(tag_id_base + 2);
-	DMA_WAIT_TAG(tag_id_base + 3);
-	DMA_WAIT_TAG(tag_id_base + 4);
-}
-
-
--- a/src/video/ps3/spulibs/spu_common.h	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-/*
- * SDL - Simple DirectMedia Layer
- * CELL BE Support for PS3 Framebuffer
- * Copyright (C) 2008, 2009 International Business Machines Corporation
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
- * USA
- *
- *  Martin Lowinski  <lowinski [at] de [dot] ibm [ibm] com>
- *  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
- *  SPE code based on research by:
- *  Rene Becker
- *  Thimo Emmerich
- */
-
-/* Common definitions/makros for SPUs */
-
-#ifndef _SPU_COMMON_H
-#define _SPU_COMMON_H
-
-#include <stdio.h>
-#include <stdint.h>
-#include <string.h>
-
-/* Tag management */
-#define DMA_WAIT_TAG(_tag)     \
-    mfc_write_tag_mask(1<<(_tag)); \
-    mfc_read_tag_status_all();
-
-/* SPU mailbox messages */
-#define SPU_READY	0
-#define SPU_START	1
-#define SPU_FIN		2
-#define SPU_EXIT	3
-
-/* Tags */
-#define RETR_BUF	0
-#define STR_BUF		1
-#define TAG_INIT	2
-
-/* Buffersizes */
-#define MAX_HDTV_WIDTH 1920
-#define MAX_HDTV_HEIGHT 1080
-/* One stride of HDTV */
-#define BUFFER_SIZE 7680
-
-/* fb_writer ppu/spu exchange parms */
-struct fb_writer_parms_t {
-	uint8_t *data;
-	uint8_t *center;
-	uint32_t out_line_stride;
-	uint32_t in_line_stride;
-	uint32_t bounded_input_height;
-	uint32_t bounded_input_width;
-	uint32_t fb_pixel_size;
-
-	/* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
-	char padding[4];
-} __attribute__((aligned(128)));
-
-/* yuv2rgb ppu/spu exchange parms */
-struct yuv2rgb_parms_t {
-	uint8_t* y_plane;
-	uint8_t* v_plane;
-	uint8_t* u_plane;
-
-	uint8_t* dstBuffer;
-
-	unsigned int src_pixel_width;
-	unsigned int src_pixel_height;
-
-	/* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
-	char padding[128 - ((4 * sizeof(uint8_t *) + 2 * sizeof(unsigned int)) & 0x7F)];
-} __attribute__((aligned(128)));
-
-/* bilin_scaler ppu/spu exchange parms */
-struct scale_parms_t {
-	uint8_t* y_plane;
-	uint8_t* v_plane;
-	uint8_t* u_plane;
-
-	uint8_t* dstBuffer;
-
-	unsigned int src_pixel_width;
-	unsigned int src_pixel_height;
-
-	unsigned int dst_pixel_width;
-	unsigned int dst_pixel_height;
-
-	/* This padding is to fulfill the need for 16 byte alignment. On parm change, update! */
-	char padding[128 - ((4 * sizeof(uint8_t *) + 4 * sizeof(unsigned int)) & 0x7F)];
-} __attribute__((aligned(128)));
-
-#endif /* _SPU_COMMON_H */
-
-
--- a/src/video/ps3/spulibs/yuv2rgb.c	Wed Jan 19 22:21:31 2011 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,662 +0,0 @@
-/*
- * SDL - Simple DirectMedia Layer
- * CELL BE Support for PS3 Framebuffer
- * Copyright (C) 2008, 2009 International Business Machines Corporation
- *
- * This library is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as published
- * by the Free Software Foundation; either version 2.1 of the License, or
- * (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
- * USA
- *
- *  Martin Lowinski  <lowinski [at] de [dot] ibm [ibm] com>
- *  Dirk Herrendoerfer <d.herrendoerfer [at] de [dot] ibm [dot] com>
- *  SPE code based on research by:
- *  Rene Becker
- *  Thimo Emmerich
- */
-
-#include "spu_common.h"
-
-#include <spu_intrinsics.h>
-#include <spu_mfcio.h>
-
-// Debugging
-//#define DEBUG
-
-// Test environment for /2 resolutions
-//#define TESTING
-
-#ifdef DEBUG
-#define deprintf(fmt, args... ) \
-	fprintf( stdout, fmt, ##args ); \
-	fflush( stdout );
-#else
-#define deprintf( fmt, args... )
-#endif
-
-struct yuv2rgb_parms_t parms_converter __attribute__((aligned(128)));
-
-/* A maximum of 8 lines Y, therefore 4 lines V, 4 lines U are stored
- * there might be the need to retrieve misaligned data, adjust
- * incoming v and u plane to be able to handle this (add 128)
- */
-unsigned char y_plane[2][(MAX_HDTV_WIDTH + 128) * 4] __attribute__((aligned(128)));
-unsigned char v_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128)));
-unsigned char u_plane[2][(MAX_HDTV_WIDTH + 128) * 2] __attribute__((aligned(128)));
-
-/* A maximum of 4 lines BGRA are stored, 4 byte per pixel */
-unsigned char bgra[4 * MAX_HDTV_WIDTH * 4] __attribute__((aligned(128)));
-
-/* some vectors needed by the float to int conversion */
-static const vector float vec_255 = { 255.0f, 255.0f, 255.0f, 255.0f };
-static const vector float vec_0_1 = { 0.1f, 0.1f, 0.1f, 0.1f };
-
-void yuv_to_rgb_w16();
-void yuv_to_rgb_w32();
-
-void yuv_to_rgb_w2_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr, unsigned int width);
-void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width);
-
-
-int main(unsigned long long spe_id __attribute__((unused)), unsigned long long argp __attribute__ ((unused)))
-{
-	deprintf("[SPU] yuv2rgb_spu is up... (on SPE #%llu)\n", spe_id);
-	uint32_t ea_mfc, mbox;
-	// send ready message
-	spu_write_out_mbox(SPU_READY);
-
-	while (1) {
-		/* Check mailbox */
-		mbox = spu_read_in_mbox();
-		deprintf("[SPU] Message is %u\n", mbox);
-		switch (mbox) {
-			case SPU_EXIT:
-				deprintf("[SPU] yuv2rgb_converter goes down...\n");
-				return 0;
-			case SPU_START:
-				break;
-			default:
-				deprintf("[SPU] Cannot handle message\n");
-				continue;
-		}
-
-		/* Tag Manager setup */
-		unsigned int tag_id;
-		tag_id = mfc_multi_tag_reserve(1);
-		if (tag_id == MFC_TAG_INVALID) {
-			deprintf("[SPU] Failed to reserve mfc tags on yuv2rgb_converter\n");
-			return 0;
-		}
-
-		/* DMA transfer for the input parameters */
-		ea_mfc = spu_read_in_mbox();
-		deprintf("[SPU] Message on yuv2rgb_converter is %u\n", ea_mfc);
-		spu_mfcdma32(&parms_converter, (unsigned int)ea_mfc, sizeof(struct yuv2rgb_parms_t), tag_id, MFC_GET_CMD);
-		DMA_WAIT_TAG(tag_id);
-
-		/* There are alignment issues that involve handling of special cases
-		 * a width of 32 results in a width of 16 in the chrominance
-		 * --> choose the proper handling to optimize the performance
-		 */
-		deprintf("[SPU] Convert %ix%i from YUV to RGB\n", parms_converter.src_pixel_width, parms_converter.src_pixel_height);
-		if (!(parms_converter.src_pixel_width & 0x1f)) {
-			deprintf("[SPU] Using yuv_to_rgb_w16\n");
-			yuv_to_rgb_w16();
-		} else {
-			deprintf("[SPU] Using yuv_to_rgb_w32\n");
-			yuv_to_rgb_w32();
-		}
-
-		mfc_multi_tag_release(tag_id, 1);
-		deprintf("[SPU] yuv2rgb_spu... done!\n");
-		/* Send FIN message */
-		spu_write_out_mbox(SPU_FIN);
-	}
-
-	return 0;
-}
-
-
-/*
- * float_to_char()
- *
- * converts a float to a character using saturated
- * arithmetic
- *
- * @param s float for conversion
- * @returns converted character
- */
-inline static unsigned char float_to_char(float s) {
-	vector float vec_s = spu_splats(s);
-	vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
-	vec_s = spu_sel(vec_s, vec_0_1, select_1);
-
-	vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
-	vec_s = spu_sel(vec_s, vec_255, select_2);
-	return (unsigned char) spu_extract(vec_s,0);
-}
-
-
-/*
- * vfloat_to_vuint()
- *
- * converts a float vector to an unsinged int vector using saturated
- * arithmetic
- *
- * @param vec_s float vector for conversion
- * @returns converted unsigned int vector
- */
-inline static vector unsigned int vfloat_to_vuint(vector float vec_s) {
-	vector unsigned int select_1 = spu_cmpgt(vec_0_1, vec_s);
-	vec_s = spu_sel(vec_s, vec_0_1, select_1);
-
-	vector unsigned int select_2 = spu_cmpgt(vec_s, vec_255);
-	vec_s = spu_sel(vec_s, vec_255, select_2);
-	return spu_convtu(vec_s,0);
-}
-
-
-void yuv_to_rgb_w16() {
-	// Pixel dimensions of the picture
-	uint32_t width, height;
-
-	// Extract parameters
-	width = parms_converter.src_pixel_width;
-	height = parms_converter.src_pixel_height;
-
-	// Plane data management
-	// Y
-	unsigned char* ram_addr_y = parms_converter.y_plane;
-	// V
-	unsigned char* ram_addr_v = parms_converter.v_plane;
-	// U
-	unsigned char* ram_addr_u = parms_converter.u_plane;
-
-	// BGRA
-	unsigned char* ram_addr_bgra = parms_converter.dstBuffer;
-
-	// Strides
-	unsigned int stride_y = width;
-	unsigned int stride_vu = width>>1;
-
-	// Buffer management
-	unsigned int buf_idx = 0;
-	unsigned int size_4lines_y = stride_y<<2;
-	unsigned int size_2lines_y = stride_y<<1;
-	unsigned int size_2lines_vu = stride_vu<<1;
-
-	// 2*width*4byte_per_pixel
-	unsigned int size_2lines_bgra = width<<3;
-
-
-	// start double-buffered processing
-	// 4 lines y
-	spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD);
-
-	// 2 lines v
-	spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
-
-	// 2 lines u
-	spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
-
-	// Wait for these transfers to be completed
-	DMA_WAIT_TAG((RETR_BUF + buf_idx));
-
-	unsigned int i;
-	for(i=0; i<(height>>2)-1; i++) {
-
-		buf_idx^=1;
-
-		// 4 lines y
-		spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF+buf_idx, MFC_GET_CMD);
-
-		// 2 lines v
-		spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
-
-		// 2 lines u
-		spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF+buf_idx, MFC_GET_CMD);
-
-		DMA_WAIT_TAG((RETR_BUF + buf_idx));
-
-		buf_idx^=1;
-
-
-		// Convert YUV to BGRA, store it back (first two lines)
-#ifndef TESTING
-		yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
-
-		// Next two lines
-		yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y,
-				v_plane[buf_idx] + stride_vu,
-				u_plane[buf_idx] + stride_vu,
-				bgra + size_2lines_bgra,
-				width);
-#else
-		yuv_to_rgb_w2_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
-
-		// Next two lines
-		yuv_to_rgb_w2_line(y_plane[buf_idx] + size_2lines_y,
-				v_plane[buf_idx] + stride_vu,
-				u_plane[buf_idx] + stride_vu,
-				bgra + size_2lines_bgra,
-				width);
-#endif
-
-		// Wait for previous storing transfer to be completed
-		DMA_WAIT_TAG(STR_BUF);
-
-		// Store converted lines in two steps->max transfer size 16384
-		spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
-		ram_addr_bgra += size_2lines_bgra;
-		spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
-		ram_addr_bgra += size_2lines_bgra;
-
-		// Move 4 lines
-		ram_addr_y += size_4lines_y;
-		ram_addr_v += size_2lines_vu;
-		ram_addr_u += size_2lines_vu;
-
-		buf_idx^=1;
-	}
-
-#ifndef TESTING
-	// Convert YUV to BGRA, store it back (first two lines)
-	yuv_to_rgb_w16_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
-
-	// Next two lines
-	yuv_to_rgb_w16_line(y_plane[buf_idx] + size_2lines_y,
-			v_plane[buf_idx] + stride_vu,
-			u_plane[buf_idx] + stride_vu,
-			bgra + size_2lines_bgra,
-			width);
-#else
-	// Convert YUV to BGRA, store it back (first two lines)
-	yuv_to_rgb_w2_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
-
-	// Next two lines
-	yuv_to_rgb_w2_line(y_plane[buf_idx] + size_2lines_y,
-			v_plane[buf_idx] + stride_vu,
-			u_plane[buf_idx] + stride_vu,
-			bgra + size_2lines_bgra,
-			width);
-#endif
-
-	// Wait for previous storing transfer to be completed
-	DMA_WAIT_TAG(STR_BUF);
-	spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
-	ram_addr_bgra += size_2lines_bgra;
-	spu_mfcdma32(bgra+size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
-
-	// wait for previous storing transfer to be completed
-	DMA_WAIT_TAG(STR_BUF);
-
-}
-
-
-void yuv_to_rgb_w32() {
-	// Pixel dimensions of the picture
-	uint32_t width, height;
-
-	// Extract parameters
-	width = parms_converter.src_pixel_width;
-	height = parms_converter.src_pixel_height;
-
-	// Plane data management
-	// Y
-	unsigned char* ram_addr_y = parms_converter.y_plane;
-	// V
-	unsigned char* ram_addr_v = parms_converter.v_plane;
-	// U
-	unsigned char* ram_addr_u = parms_converter.u_plane;
-
-	// BGRA
-	unsigned char* ram_addr_bgra = parms_converter.dstBuffer;
-
-	// Strides
-	unsigned int stride_y = width;
-	unsigned int stride_vu = width>>1;
-
-	// Buffer management
-	unsigned int buf_idx = 0;
-	unsigned int size_4lines_y = stride_y<<2;
-	unsigned int size_2lines_y = stride_y<<1;
-	unsigned int size_2lines_vu = stride_vu<<1;
-
-	// 2*width*4byte_per_pixel
-	unsigned int size_2lines_bgra = width<<3;
-
-	// start double-buffered processing
-	// 4 lines y
-	spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD);
-	// 2 lines v
-	spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
-	// 2 lines u
-	spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
-
-	// Wait for these transfers to be completed
-	DMA_WAIT_TAG((RETR_BUF + buf_idx));
-
-	unsigned int i;
-	for(i=0; i < (height>>2)-1; i++) {
-		buf_idx^=1;
-		// 4 lines y
-		spu_mfcdma32(y_plane[buf_idx], (unsigned int) ram_addr_y+size_4lines_y, size_4lines_y, RETR_BUF + buf_idx, MFC_GET_CMD);
-		deprintf("4lines = %d\n", size_4lines_y);
-		// 2 lines v
-		spu_mfcdma32(v_plane[buf_idx], (unsigned int) ram_addr_v+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
-		deprintf("2lines = %d\n", size_2lines_vu);
-		// 2 lines u
-		spu_mfcdma32(u_plane[buf_idx], (unsigned int) ram_addr_u+size_2lines_vu, size_2lines_vu, RETR_BUF + buf_idx, MFC_GET_CMD);
-		deprintf("2lines = %d\n", size_2lines_vu);
-
-		DMA_WAIT_TAG((RETR_BUF + buf_idx));
-
-		buf_idx^=1;
-
-		// Convert YUV to BGRA, store it back (first two lines)
-		yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
-
-		// Next two lines
-		yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y,
-				v_plane[buf_idx] + stride_vu,
-				u_plane[buf_idx] + stride_vu,
-				bgra + size_2lines_bgra,
-				width);
-
-		// Wait for previous storing transfer to be completed
-		DMA_WAIT_TAG(STR_BUF);
-
-		// Store converted lines in two steps->max transfer size 16384
-		spu_mfcdma32(bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
-		ram_addr_bgra += size_2lines_bgra;
-		spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int)ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
-		ram_addr_bgra += size_2lines_bgra;
-
-		// Move 4 lines
-		ram_addr_y += size_4lines_y;
-		ram_addr_v += size_2lines_vu;
-		ram_addr_u += size_2lines_vu;
-
-		buf_idx^=1;
-	}
-
-	// Convert YUV to BGRA, store it back (first two lines)
-	yuv_to_rgb_w32_line(y_plane[buf_idx], v_plane[buf_idx], u_plane[buf_idx], bgra, width);
-
-	// Next two lines
-	yuv_to_rgb_w32_line(y_plane[buf_idx] + size_2lines_y,
-			v_plane[buf_idx] + stride_vu,
-			u_plane[buf_idx] + stride_vu,
-			bgra + size_2lines_bgra,
-			width);
-
-	// Wait for previous storing transfer to be completed
-	DMA_WAIT_TAG(STR_BUF);
-	spu_mfcdma32(bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
-	ram_addr_bgra += size_2lines_bgra;
-	spu_mfcdma32(bgra + size_2lines_bgra, (unsigned int) ram_addr_bgra, size_2lines_bgra, STR_BUF, MFC_PUT_CMD);
-
-	// Wait for previous storing transfer to be completed
-	DMA_WAIT_TAG(STR_BUF);
-}
-
-
-/* Some vectors needed by the yuv 2 rgb conversion algorithm */
-const vector float vec_minus_128 = { -128.0f, -128.0f, -128.0f, -128.0f };
-const vector unsigned char vec_null = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
-const vector unsigned char vec_char2int_first = { 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00, 0x11, 0x00, 0x00, 0x00, 0x12, 0x00, 0x00, 0x00, 0x13 };
-const vector unsigned char vec_char2int_second = { 0x00, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x15, 0x00, 0x00, 0x00, 0x16, 0x00, 0x00, 0x00, 0x17 };
-const vector unsigned char vec_char2int_third = { 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x19, 0x00, 0x00, 0x00, 0x1A, 0x00, 0x00, 0x00, 0x1B };
-const vector unsigned char vec_char2int_fourth = { 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x1D, 0x00, 0x00, 0x00, 0x1E, 0x00, 0x00, 0x00, 0x1F };
-
-const vector float vec_R_precalc_coeff = {1.403f, 1.403f, 1.403f, 1.403f};
-const vector float vec_Gu_precalc_coeff = {-0.344f, -0.344f, -0.344f, -0.344f};
-const vector float vec_Gv_precalc_coeff = {-0.714f, -0.714f, -0.714f, -0.714f};
-const vector float vec_B_precalc_coeff = {1.773f, 1.773f, 1.773f, 1.773f};
-
-const vector unsigned int vec_alpha =  { 255 << 24, 255 << 24, 255 << 24, 255 << 24 };
-
-const vector unsigned char vec_select_floats_upper = { 0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x04, 0x05, 0x06, 0x07 };
-const vector unsigned char vec_select_floats_lower = { 0x08, 0x09, 0x0A, 0x0B, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F };
-
-
-#ifdef TESTING
-/*
- * yuv_to_rgb_w2()
- *
- * - converts x * 4 pixels from YUV to RGB
- * - two lines of YUV are taken as input.
- * - width has to be a multiple of 2 (= 4 pixel)
- *
- * @param y_addr address of the y plane (local store)
- * @param v_addr address of the v plane (local store)
- * @param u_addr address of the u plane (local store)
- * @param bgra_addr_char address of the bgra output buffer (local store)
- * @param width the width of a line in pixel
- */
-void yuv_to_rgb_w2_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_char, unsigned int width) {
-	// each pixel is stored as an integer
-	unsigned int* bgra_addr = (unsigned int*) bgra_addr_char;
-
-	unsigned int x;
-	// Go through each line in steps of 2, because every U and V value is connected to 4 pixels Y (YUV 4:2:0)
-	for(x = 0; x < width; x+=2) {
-		// Get the 4 Y, 1 U and 1 V values
-		const unsigned char Y_1 = *(y_addr + x);
-		const unsigned char Y_2 = *(y_addr + x + 1);
-		const unsigned char Y_3 = *(y_addr + x + width);
-		const unsigned char Y_4 = *(y_addr + x + width + 1);
-		const unsigned char U = *(u_addr + (x >> 1));
-		const unsigned char V = *(v_addr + (x >> 1));
-
-		// Start converting
-		float V_minus_128 = (float)((float)V - 128.0f);
-		float U_minus_128 = (float)((float)U - 128.0f);
-
-		float R_precalculate = 1.403f * V_minus_128;
-		float G_precalculate = -(0.344f * U_minus_128 + 0.714f * V_minus_128);
-		float B_precalculate = 1.773f * U_minus_128;
-
-		// Cast the results
-		const unsigned char R_1 = float_to_char((Y_1 + R_precalculate));
-		const unsigned char R_2 = float_to_char((Y_2 + R_precalculate));
-		const unsigned char R_3 = float_to_char((Y_3 + R_precalculate));
-		const unsigned char R_4 = float_to_char((Y_4 + R_precalculate));
-		const unsigned char G_1 = float_to_char((Y_1 + G_precalculate));
-		const unsigned char G_2 = float_to_char((Y_2 + G_precalculate));
-		const unsigned char G_3 = float_to_char((Y_3 + G_precalculate));
-		const unsigned char G_4 = float_to_char((Y_4 + G_precalculate));
-		const unsigned char B_1 = float_to_char((Y_1 + B_precalculate));
-		const unsigned char B_2 = float_to_char((Y_2 + B_precalculate));
-		const unsigned char B_3 = float_to_char((Y_3 + B_precalculate));
-		const unsigned char B_4 = float_to_char((Y_4 + B_precalculate));
-
-		// Write back
-		*(bgra_addr + x) = (B_1 << 0)| (G_1 << 8) | (R_1 << 16) | (255 << 24);
-		*(bgra_addr + x + 1) = (B_2 << 0)| (G_2 << 8) | (R_2 << 16) | (255 << 24);
-		*(bgra_addr + x + width) = (B_3 << 0)| (G_3 << 8) | (R_3 << 16) | (255 << 24);
-		*(bgra_addr + x + width + 1) = (B_4 << 0)| (G_4 << 8) | (R_4 << 16) | (255 << 24);
-	}
-}
-#endif
-
-
-/*
- * yuv_to_rgb_w32()
- *
- * processes to line of yuv-input, width has to be a multiple of 32
- * two lines of yuv are taken as input
- *
- * @param y_addr address of the y plane in local store
- * @param v_addr address of the v plane in local store
- * @param u_addr address of the u plane in local store
- * @param bgra_addr_ address of the bgra output buffer
- * @param width the width in pixel
- */
-void yuv_to_rgb_w32_line(unsigned char* y_addr, unsigned char* v_addr, unsigned char* u_addr, unsigned char* bgra_addr_, unsigned int width) {
-	// each pixel is stored as an integer
-	unsigned int* bgra_addr = (unsigned int*) bgra_addr_;
-
-	unsigned int x;
-	for(x = 0; x < width; x+=32) {
-		// Gehe zweischrittig durch die zeile, da jeder u und v wert fuer 4 pixel(zwei hoch, zwei breit) gilt
-
-		const vector unsigned char vchar_Y_1 = *((vector unsigned char*)(y_addr + x));
-		const vector unsigned char vchar_Y_2 = *((vector unsigned char*)(y_addr + x + 16));
-		const vector unsigned char vchar_Y_3 = *((vector unsigned char*)(y_addr + x + width));
-		const vector unsigned char vchar_Y_4 = *((vector unsigned char*)(y_addr + x + width + 16));
-		const vector unsigned char vchar_U = *((vector unsigned char*)(u_addr + (x >> 1)));
-		const vector unsigned char vchar_V = *((vector unsigned char*)(v_addr + (x >> 1)));
-
-		const vector float vfloat_U_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_first), 0),vec_minus_128);
-		const vector float vfloat_U_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_second), 0),vec_minus_128);
-		const vector float vfloat_U_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_third), 0),vec_minus_128);
-		const vector float vfloat_U_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_U, vec_char2int_fourth), 0),vec_minus_128);
-
-		const vector float vfloat_V_1 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_first), 0),vec_minus_128);
-		const vector float vfloat_V_2 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_second), 0),vec_minus_128);
-		const vector float vfloat_V_3 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_third), 0),vec_minus_128);
-		const vector float vfloat_V_4 = spu_add(spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_V, vec_char2int_fourth), 0),vec_minus_128);
-
-		vector float Y_1 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_first), 0);
-		vector float Y_2 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_second), 0);
-		vector float Y_3 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_third), 0);
-		vector float Y_4 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_1, vec_char2int_fourth), 0);
-		vector float Y_5 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_first), 0);
-		vector float Y_6 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_second), 0);
-		vector float Y_7 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_third), 0);
-		vector float Y_8 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_2, vec_char2int_fourth), 0);
-		vector float Y_9 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_first), 0);
-		vector float Y_10 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_second), 0);
-		vector float Y_11 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_third), 0);
-		vector float Y_12 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_3, vec_char2int_fourth), 0);
-		vector float Y_13 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_first), 0);
-		vector float Y_14 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_second), 0);
-		vector float Y_15 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_third), 0);
-		vector float Y_16 = spu_convtf((vector unsigned int)spu_shuffle(vec_null, vchar_Y_4, vec_char2int_fourth), 0);
-
-		const vector float R1a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_1);
-		const vector float R2a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_2);
-		const vector float R3a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_3);
-		const vector float R4a_precalculate = spu_mul(vec_R_precalc_coeff, vfloat_V_4);
-
-		const vector float R1_precalculate = spu_shuffle(R1a_precalculate,  R1a_precalculate, vec_select_floats_upper);
-		const vector float R2_precalculate = spu_shuffle(R1a_precalculate,  R1a_precalculate, vec_select_floats_lower);
-		const vector float R3_precalculate = spu_shuffle(R2a_precalculate,  R2a_precalculate, vec_select_floats_upper);
-		const vector float R4_precalculate = spu_shuffle(R2a_precalculate,  R2a_precalculate, vec_select_floats_lower);
-		const vector float R5_precalculate = spu_shuffle(R3a_precalculate,  R3a_precalculate, vec_select_floats_upper);
-		const vector float R6_precalculate = spu_shuffle(R3a_precalculate,  R3a_precalculate, vec_select_floats_lower);
-		const vector float R7_precalculate = spu_shuffle(R4a_precalculate,  R4a_precalculate, vec_select_floats_upper);
-		const vector float R8_precalculate = spu_shuffle(R4a_precalculate,  R4a_precalculate, vec_select_floats_lower);
-
-
-		const vector float G1a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_1, spu_mul(vfloat_V_1, vec_Gv_precalc_coeff));
-		const vector float G2a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_2, spu_mul(vfloat_V_2, vec_Gv_precalc_coeff));
-		const vector float G3a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_3, spu_mul(vfloat_V_3, vec_Gv_precalc_coeff));
-		const vector float G4a_precalculate = spu_madd(vec_Gu_precalc_coeff, vfloat_U_4, spu_mul(vfloat_V_4, vec_Gv_precalc_coeff));
-
-		const vector float G1_precalculate = spu_shuffle(G1a_precalculate,  G1a_precalculate, vec_select_floats_upper);
-		const vector float G2_precalculate = spu_shuffle(G1a_precalculate,  G1a_precalculate, vec_select_floats_lower);
-		const vector float G3_precalculate = spu_shuffle(G2a_precalculate,  G2a_precalculate, vec_select_floats_upper);
-		const vector float G4_precalculate = spu_shuffle(G2a_precalculate,  G2a_precalculate, vec_select_floats_lower);
-		const vector float G5_precalculate = spu_shuffle(G3a_precalculate,  G3a_precalculate, vec_select_floats_upper);
-		const vector float G6_precalculate = spu_shuffle(G3a_precalculate,  G3a_precalculate, vec_select_floats_lower);
-		const vector float G7_precalculate = spu_shuffle(G4a_precalculate,  G4a_precalculate, vec_select_floats_upper);
-		const vector float G8_precalculate = spu_shuffle(G4a_precalculate,  G4a_precalculate, vec_select_floats_lower);
-
-
-		const vector float B1a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_1);
-		const vector float B2a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_2);
-		const vector float B3a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_3);
-		const vector float B4a_precalculate = spu_mul(vec_B_precalc_coeff, vfloat_U_4);
-
-		const vector float B1_precalculate = spu_shuffle(B1a_precalculate,  B1a_precalculate, vec_select_floats_upper);
-		const vector float B2_precalculate = spu_shuffle(B1a_precalculate,  B1a_precalculate, vec_select_floats_lower);
-		const vector float B3_precalculate = spu_shuffle(B2a_precalculate,  B2a_precalculate, vec_select_floats_upper);
-		const vector float B4_precalculate = spu_shuffle(B2a_precalculate,  B2a_precalculate, vec_select_floats_lower);
-		const vector float B5_precalculate = spu_shuffle(B3a_precalculate,  B3a_precalculate, vec_select_floats_upper);
-		const vector float B6_precalculate = spu_shuffle(B3a_precalculate,  B3a_precalculate, vec_select_floats_lower);
-		const vector float B7_precalculate = spu_shuffle(B4a_precalculate,  B4a_precalculate, vec_select_floats_upper);
-		const vector float B8_precalculate = spu_shuffle(B4a_precalculate,  B4a_precalculate, vec_select_floats_lower);
-
-
-		const vector unsigned int  R_1 = vfloat_to_vuint(spu_add( Y_1, R1_precalculate));
-		const vector unsigned int  R_2 = vfloat_to_vuint(spu_add( Y_2, R2_precalculate));
-		const vector unsigned int  R_3 = vfloat_to_vuint(spu_add( Y_3, R3_precalculate));
-		const vector unsigned int  R_4 = vfloat_to_vuint(spu_add( Y_4, R4_precalculate));
-		const vector unsigned int  R_5 = vfloat_to_vuint(spu_add( Y_5, R5_precalculate));
-		const vector unsigned int  R_6 = vfloat_to_vuint(spu_add( Y_6, R6_precalculate));
-		const vector unsigned int  R_7 = vfloat_to_vuint(spu_add( Y_7, R7_precalculate));
-		const vector unsigned int  R_8 = vfloat_to_vuint(spu_add( Y_8, R8_precalculate));
-		const vector unsigned int  R_9 = vfloat_to_vuint(spu_add( Y_9, R1_precalculate));
-		const vector unsigned int R_10 = vfloat_to_vuint(spu_add(Y_10, R2_precalculate));
-		const vector unsigned int R_11 = vfloat_to_vuint(spu_add(Y_11, R3_precalculate));
-		const vector unsigned int R_12 = vfloat_to_vuint(spu_add(Y_12, R4_precalculate));
-		const vector unsigned int R_13 = vfloat_to_vuint(spu_add(Y_13, R5_precalculate));
-		const vector unsigned int R_14 = vfloat_to_vuint(spu_add(Y_14, R6_precalculate));
-		const vector unsigned int R_15 = vfloat_to_vuint(spu_add(Y_15, R7_precalculate));
-		const vector unsigned int R_16 = vfloat_to_vuint(spu_add(Y_16, R8_precalculate));
-
-		const vector unsigned int  G_1 = vfloat_to_vuint(spu_add( Y_1, G1_precalculate));
-		const vector unsigned int  G_2 = vfloat_to_vuint(spu_add( Y_2, G2_precalculate));
-		const vector unsigned int  G_3 = vfloat_to_vuint(spu_add( Y_3, G3_precalculate));
-		const vector unsigned int  G_4 = vfloat_to_vuint(spu_add( Y_4, G4_precalculate));
-		const vector unsigned int  G_5 = vfloat_to_vuint(spu_add( Y_5, G5_precalculate));
-		const vector unsigned int  G_6 = vfloat_to_vuint(spu_add( Y_6, G6_precalculate));
-		const vector unsigned int  G_7 = vfloat_to_vuint(spu_add( Y_7, G7_precalculate));
-		const vector unsigned int  G_8 = vfloat_to_vuint(spu_add( Y_8, G8_precalculate));
-		const vector unsigned int  G_9 = vfloat_to_vuint(spu_add( Y_9, G1_precalculate));
-		const vector unsigned int G_10 = vfloat_to_vuint(spu_add(Y_10, G2_precalculate));
-		const vector unsigned int G_11 = vfloat_to_vuint(spu_add(Y_11, G3_precalculate));
-		const vector unsigned int G_12 = vfloat_to_vuint(spu_add(Y_12, G4_precalculate));
-		const vector unsigned int G_13 = vfloat_to_vuint(spu_add(Y_13, G5_precalculate));
-		const vector unsigned int G_14 = vfloat_to_vuint(spu_add(Y_14, G6_precalculate));
-		const vector unsigned int G_15 = vfloat_to_vuint(spu_add(Y_15, G7_precalculate));
-		const vector unsigned int G_16 = vfloat_to_vuint(spu_add(Y_16, G8_precalculate));
-
-		const vector unsigned int  B_1 = vfloat_to_vuint(spu_add( Y_1, B1_precalculate));
-		const vector unsigned int  B_2 = vfloat_to_vuint(spu_add( Y_2, B2_precalculate));
-		const vector unsigned int  B_3 = vfloat_to_vuint(spu_add( Y_3, B3_precalculate));
-		const vector unsigned int  B_4 = vfloat_to_vuint(spu_add( Y_4, B4_precalculate));
-		const vector unsigned int  B_5 = vfloat_to_vuint(spu_add( Y_5, B5_precalculate));
-		const vector unsigned int  B_6 = vfloat_to_vuint(spu_add( Y_6, B6_precalculate));
-		const vector unsigned int  B_7 = vfloat_to_vuint(spu_add( Y_7, B7_precalculate));
-		const vector unsigned int  B_8 = vfloat_to_vuint(spu_add( Y_8, B8_precalculate));
-		const vector unsigned int  B_9 = vfloat_to_vuint(spu_add( Y_9, B1_precalculate));
-		const vector unsigned int B_10 = vfloat_to_vuint(spu_add(Y_10, B2_precalculate));
-		const vector unsigned int B_11 = vfloat_to_vuint(spu_add(Y_11, B3_precalculate));
-		const vector unsigned int B_12 = vfloat_to_vuint(spu_add(Y_12, B4_precalculate));
-		const vector unsigned int B_13 = vfloat_to_vuint(spu_add(Y_13, B5_precalculate));
-		const vector unsigned int B_14 = vfloat_to_vuint(spu_add(Y_14, B6_precalculate));
-		const vector unsigned int B_15 = vfloat_to_vuint(spu_add(Y_15, B7_precalculate));
-		const vector unsigned int B_16 = vfloat_to_vuint(spu_add(Y_16, B8_precalculate));
-
-		*((vector unsigned int*)(bgra_addr + x)) = spu_or(spu_or(vec_alpha,  B_1), spu_or(spu_slqwbyte( R_1, 2),spu_slqwbyte(G_1, 1)));
-		*((vector unsigned int*)(bgra_addr + x + 4)) = spu_or(spu_or(vec_alpha,  B_2), spu_or(spu_slqwbyte( R_2, 2),spu_slqwbyte(G_2, 1)));
-		*((vector unsigned int*)(bgra_addr + x + 8)) = spu_or(spu_or(vec_alpha,  B_3), spu_or(spu_slqwbyte( R_3, 2),spu_slqwbyte(G_3, 1)));
-		*((vector unsigned int*)(bgra_addr + x + 12)) = spu_or(spu_or(vec_alpha,  B_4), spu_or(spu_slqwbyte( R_4, 2),spu_slqwbyte(G_4, 1)));
-		*((vector unsigned int*)(bgra_addr + x + 16)) = spu_or(spu_or(vec_alpha,  B_5), spu_or(spu_slqwbyte( R_5, 2),spu_slqwbyte(G_5, 1)));
-		*((vector unsigned int*)(bgra_addr + x + 20)) = spu_or(spu_or(vec_alpha,  B_6), spu_or(spu_slqwbyte( R_6, 2),spu_slqwbyte(G_6, 1)));
-		*((vector unsigned int*)(bgra_addr + x + 24)) = spu_or(spu_or(vec_alpha,  B_7), spu_or(spu_slqwbyte( R_7, 2),spu_slqwbyte(G_7, 1)));
-		*((vector unsigned int*)(bgra_addr + x + 28)) = spu_or(spu_or(vec_alpha,  B_8), spu_or(spu_slqwbyte( R_8, 2),spu_slqwbyte(G_8, 1)));
-		*((vector unsigned int*)(bgra_addr + x + width)) = spu_or(spu_or(vec_alpha,  B_9), spu_or(spu_slqwbyte( R_9, 2),spu_slqwbyte(G_9, 1)));
-		*((vector unsigned int*)(bgra_addr + x + width + 4)) = spu_or(spu_or(vec_alpha, B_10), spu_or(spu_slqwbyte(R_10, 2),spu_slqwbyte(G_10, 1)));
-		*((vector unsigned int*)(bgra_addr + x + width + 8)) = spu_or(spu_or(vec_alpha, B_11), spu_or(spu_slqwbyte(R_11, 2),spu_slqwbyte(G_11, 1)));
-		*((vector unsigned int*)(bgra_addr + x + width + 12)) = spu_or(spu_or(vec_alpha, B_12), spu_or(spu_slqwbyte(R_12, 2),spu_slqwbyte(G_12, 1)));
-		*((vector unsigned int*)(bgra_addr + x + width + 16)) = spu_or(spu_or(vec_alpha, B_13), spu_or(spu_slqwbyte(R_13, 2),spu_slqwbyte(G_13, 1)));
-		*((vector unsigned int*)(bgra_addr + x + width + 20)) = spu_or(spu_or(vec_alpha, B_14), spu_or(spu_slqwbyte(R_14, 2),spu_slqwbyte(G_14, 1)));
-		*((vector unsigned int*)(bgra_addr + x + width + 24)) = spu_or(spu_or(vec_alpha, B_15), spu_or(spu_slqwbyte(R_15, 2),spu_slqwbyte(G_15, 1)));
-		*((vector unsigned int*)(bgra_addr + x + width + 28)) = spu_or(spu_or(vec_alpha, B_16), spu_or(spu_slqwbyte(R_16, 2),spu_slqwbyte(G_16, 1)));
-	}
-}
-