Altivec blitter for 555 -> 8888 surface, written by me.
authorRyan C. Gordon <icculus@icculus.org>
Thu, 08 Sep 2005 07:20:59 +0000
changeset 1139 d0ae4dff7208
parent 1138 fcfb783a3ca2
child 1140 af8b0f9ac2f4
Altivec blitter for 555 -> 8888 surface, written by me. --ryan.
src/video/SDL_blit_N.c
--- a/src/video/SDL_blit_N.c	Thu Sep 08 07:15:44 2005 +0000
+++ b/src/video/SDL_blit_N.c	Thu Sep 08 07:20:59 2005 +0000
@@ -382,6 +382,151 @@
 
 }
 
+
+static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
+    int height = info->d_height;
+    Uint8 *src = (Uint8 *) info->s_pixels;
+    int srcskip = info->s_skip;
+    Uint8 *dst = (Uint8 *) info->d_pixels;
+    int dstskip = info->d_skip;
+    SDL_PixelFormat *srcfmt = info->src;
+    SDL_PixelFormat *dstfmt = info->dst;
+    unsigned alpha;
+    vector unsigned char valpha;
+    vector unsigned char vpermute;
+    vector unsigned short vf800;
+    vector unsigned int v8 = vec_splat_u32(8);
+    vector unsigned int v16 = vec_add(v8, v8);
+    vector unsigned short v1 = vec_splat_u16(1);
+    vector unsigned short v3 = vec_splat_u16(3);
+    /* 
+        0x10 - 0x1f is the alpha
+        0x00 - 0x0e evens are the red
+        0x01 - 0x0f odds are zero
+    */
+    vector unsigned char vredalpha1 = (vector unsigned char)(
+        0x10, 0x00, 0x01, 0x01,
+        0x10, 0x02, 0x01, 0x01,
+        0x10, 0x04, 0x01, 0x01,
+        0x10, 0x06, 0x01, 0x01
+    );
+    vector unsigned char vredalpha2 = (vector unsigned char)(
+        vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
+    );
+    /*
+        0x00 - 0x0f is ARxx ARxx ARxx ARxx
+        0x11 - 0x0f odds are blue
+    */
+    vector unsigned char vblue1 = (vector unsigned char)(
+        0x00, 0x01, 0x02, 0x11,
+        0x04, 0x05, 0x06, 0x13,
+        0x08, 0x09, 0x0a, 0x15,
+        0x0c, 0x0d, 0x0e, 0x17
+    );
+    vector unsigned char vblue2 = (vector unsigned char)(
+        vec_add((vector unsigned int)vblue1, v8)
+    );
+    /*
+        0x00 - 0x0f is ARxB ARxB ARxB ARxB
+        0x10 - 0x0e evens are green
+    */
+    vector unsigned char vgreen1 = (vector unsigned char)(
+        0x00, 0x01, 0x10, 0x03,
+        0x04, 0x05, 0x12, 0x07,
+        0x08, 0x09, 0x14, 0x0b,
+        0x0c, 0x0d, 0x16, 0x0f
+    );
+    vector unsigned char vgreen2 = (vector unsigned char)(
+        vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
+    );
+    
+
+    assert(srcfmt->BytesPerPixel == 2);
+    assert(dstfmt->BytesPerPixel == 4);
+
+    vf800 = (vector unsigned short)vec_splat_u8(-7);
+    vf800 = vec_sl(vf800, vec_splat_u16(8));
+
+    if (dstfmt->Amask && srcfmt->alpha) {
+        ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
+        valpha = vec_splat(valpha, 0);
+    } else {
+        alpha = 0;
+        valpha = vec_splat_u8(0);
+    }
+
+    vpermute = calc_swizzle32(NULL, dstfmt);
+    while (height--) {
+        vector unsigned char valigner;
+        vector unsigned char voverflow;
+        vector unsigned char vsrc;
+
+        int width = info->d_width;
+        int extrawidth;
+
+        /* do scalar until we can align... */
+#define ONE_PIXEL_BLEND(condition, widthvar) \
+        while (condition) { \
+            unsigned sR, sG, sB; \
+            unsigned short pixel = *((unsigned short *)src); \
+            sR = (pixel >> 7) & 0xf8; \
+            sG = (pixel >> 2) & 0xf8; \
+            sB = (pixel << 3) & 0xf8; \
+            ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
+            src += 2; \
+            dst += 4; \
+            widthvar--; \
+        }
+        ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
+
+        /* After all that work, here's the vector part! */
+        extrawidth = (width % 8);  /* trailing unaligned stores */
+        width -= extrawidth;
+        vsrc = vec_ld(0, src);
+        valigner = VEC_ALIGNER(src);
+
+        while (width) {
+            vector unsigned short vR, vG, vB;
+            vector unsigned char vdst1, vdst2;
+
+            voverflow = vec_ld(15, src);
+            vsrc = vec_perm(vsrc, voverflow, valigner);
+
+            vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
+            vB = vec_sl((vector unsigned short)vsrc, v3);
+            vG = vec_sl(vB, v3);
+
+            vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
+            vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
+            vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
+            vdst1 = vec_perm(vdst1, valpha, vpermute);
+            vec_st(vdst1, 0, dst);
+
+            vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
+            vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
+            vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
+            vdst2 = vec_perm(vdst2, valpha, vpermute);
+            vec_st(vdst2, 16, dst);
+            
+            width -= 8;
+            dst += 32;
+            src += 16;
+            vsrc = voverflow;
+        }
+
+        assert(width == 0);
+
+
+        /* do scalar until we can align... */
+        ONE_PIXEL_BLEND((extrawidth), extrawidth);
+#undef ONE_PIXEL_BLEND
+
+        src += srcskip;  /* move to next row, accounting for pitch. */
+        dst += dstskip;
+    }
+
+}
+
 static void BlitNtoNKey(SDL_BlitInfo *info);
 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
@@ -2090,6 +2235,8 @@
     /* has-altivec */
     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
+    { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
+      2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
 #endif
     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },