Skip to content

Commit

Permalink
1-channel SIMD code can coerce alignment in one special case, too.
Browse files Browse the repository at this point in the history
  • Loading branch information
icculus committed Apr 25, 2018
1 parent d110ee9 commit a3bb60f
Showing 1 changed file with 22 additions and 10 deletions.
32 changes: 22 additions & 10 deletions mojoal.c
Expand Up @@ -734,7 +734,16 @@ static void mix_float32_c1_sse(const ALfloat * restrict panning, const float * r
const int leftover = mixframes % 8;
ALsizei i;

if ( (((size_t)stream) % 16) || (((size_t)data) % 16) ) {
/* We can align this to 16 in one special case. */
if ( ((((size_t)data) % 16) == 8) && ((((size_t)stream) % 16) == 0) && (mixframes >= 2) ) {
stream[0] += data[0] * left;
stream[1] += data[0] * right;
stream[2] += data[1] * left;
stream[3] += data[1] * right;
stream += 4;
data += 2;
mix_float32_c1_sse(panning, data + 2, stream + 2, mixframes - 2);
} else if ( (((size_t)stream) % 16) || (((size_t)data) % 16) ) {
/* unaligned, do scalar version. */
mix_float32_c1_scalar(panning, data, stream, mixframes);
} else if ((left == 1.0f) && (right == 1.0f)) {
Expand Down Expand Up @@ -796,10 +805,7 @@ static void mix_float32_c2_sse(const ALfloat * restrict panning, const float * r
stream += 2;
data += 2;
mix_float32_c2_sse(panning, data + 2, stream + 2, mixframes - 1);
return;
}

if ( (((size_t)stream) % 16) || (((size_t)data) % 16) ) {
} else if ( (((size_t)stream) % 16) || (((size_t)data) % 16) ) {
/* unaligned, do scalar version. */
mix_float32_c2_scalar(panning, data, stream, mixframes);
} else if ((left == 1.0f) && (right == 1.0f)) {
Expand Down Expand Up @@ -843,7 +849,16 @@ static void mix_float32_c1_neon(const ALfloat * restrict panning, const float *
const int leftover = mixframes % 8;
ALsizei i;

if ( (((size_t)stream) % 16) || (((size_t)data) % 16) ) {
/* We can align this to 16 in one special case. */
if ( ((((size_t)data) % 16) == 8) && ((((size_t)stream) % 16) == 0) && (mixframes >= 2) ) {
stream[0] += data[0] * left;
stream[1] += data[0] * right;
stream[2] += data[1] * left;
stream[3] += data[1] * right;
stream += 4;
data += 2;
mix_float32_c1_neon(panning, data + 2, stream + 2, mixframes - 2);
} else if ( (((size_t)stream) % 16) || (((size_t)data) % 16) ) {
/* unaligned, do scalar version. */
mix_float32_c1_scalar(panning, data, stream, mixframes);
} else if ((left == 1.0f) && (right == 1.0f)) {
Expand Down Expand Up @@ -905,10 +920,7 @@ static void mix_float32_c2_neon(const ALfloat * restrict panning, const float *
stream += 2;
data += 2;
mix_float32_c2_neon(panning, data + 2, stream + 2, mixframes - 1);
return;
}

if ( (((size_t)stream) % 16) || (((size_t)data) % 16) ) {
} else if ( (((size_t)stream) % 16) || (((size_t)data) % 16) ) {
/* unaligned, do scalar version. */
mix_float32_c2_scalar(panning, data, stream, mixframes);
} else if ((left == 1.0f) && (right == 1.0f)) {
Expand Down

0 comments on commit a3bb60f

Please sign in to comment.