Skip to content

Commit

Permalink
bug fix for blend4 function
Browse files Browse the repository at this point in the history
  • Loading branch information
AgnerF committed Sep 14, 2019
1 parent 6ae7284 commit 0643c72
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 7 deletions.
6 changes: 3 additions & 3 deletions vectorclass.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/**************************** vectorclass.h ********************************
* Author: Agner Fog
* Date created: 2012-05-30
* Last modified: 2019-08-01
* Version: 2.00.00
* Last modified: 2019-09-14
* Version: 2.00.01
* Project: vector class library
* Home: https://github.com/vectorclass
* Description:
Expand All @@ -26,7 +26,7 @@
* Apache License version 2.0 or later.
******************************************************************************/
#ifndef VECTORCLASS_H
#define VECTORCLASS_H 20000
#define VECTORCLASS_H 20001

// Maximum vector size, bits. Allowed values are 128, 256, 512
#ifndef MAX_VECTOR_SIZE
Expand Down
16 changes: 12 additions & 4 deletions vectorf128.h
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
/**************************** vectorf128.h *******************************
* Author: Agner Fog
* Date created: 2012-05-30
* Last modified: 2019-08-01
* Version: 2.00.00
* Last modified: 2019-09-14
* Version: 2.00.01
* Project: vector class library
* Description:
* Header file defining 128-bit floating point vector classes
Expand Down Expand Up @@ -2644,6 +2644,14 @@ static inline Vec4f blend4(Vec4f const a, Vec4f const b) {
else if constexpr ((flags & blend_shufba) != 0 && !blendonly) { // use floating point instruction shufps
y = _mm_shuffle_ps(b, a, flags >> blend_shufpattern);
}
#if INSTRSET >= 4 // SSSE3
else if constexpr ((flags & blend_rotateab) != 0) {
y = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(a), _mm_castps_si128(b), flags >> blend_rotpattern));
}
else if constexpr ((flags & blend_rotateba) != 0) {
y = _mm_castsi128_ps(_mm_alignr_epi8(_mm_castps_si128(b), _mm_castps_si128(a), flags >> blend_rotpattern));
}
#endif
else { // No special cases. permute a and b separately, then blend.
#if INSTRSET >= 5 // SSE4.1
constexpr bool dozero = false;
Expand All @@ -2662,8 +2670,8 @@ static inline Vec4f blend4(Vec4f const a, Vec4f const b) {
y = _mm_mask_mov_ps (ya, (uint8_t)make_bit_mask<4, 0x302>(indexs), yb);
#elif INSTRSET >= 5 // SSE4.1
constexpr uint8_t mm = ((i0 & 4) ? 0x01 : 0) | ((i1 & 4) ? 0x02 : 0) | ((i2 & 4) ? 0x04 : 0) | ((i3 & 4) ? 0x08 : 0);
if constexpr (mm == 0x01) y = _mm_move_ss(a, b);
else if constexpr (mm == 0x0E) y = _mm_move_ss(b, a);
if constexpr (mm == 0x01) y = _mm_move_ss(ya, yb);
else if constexpr (mm == 0x0E) y = _mm_move_ss(yb, ya);
else {
y = _mm_blend_ps (ya, yb, mm);
}
Expand Down

0 comments on commit 0643c72

Please sign in to comment.