15 #ifndef AVIR_FLOAT4_SSE_INCLUDED
16 #define AVIR_FLOAT4_SSE_INCLUDED
18 #include <xmmintrin.h>
19 #include <emmintrin.h>
50 :
value( _mm_set1_ps( s ))
60 float4& operator = (
const __m128 s )
66 float4& operator = (
const float s )
68 value = _mm_set1_ps( s );
72 operator float ()
const
74 return( _mm_cvtss_f32(
value ));
85 return( _mm_load_ps( p ));
96 return( _mm_loadu_ps( p ));
113 return( _mm_loadu_ps( p ));
117 return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ]));
124 return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ]));
128 return( _mm_load_ss( p ));
141 _mm_store_ps( p,
value );
152 _mm_storeu_ps( p,
value );
163 void storeu(
float*
const p,
int lim )
const
169 _mm_storeu_ps( p,
value );
173 _mm_storel_pi( (__m64*) p,
value );
174 _mm_store_ss( p + 2, _mm_movehl_ps(
value,
value ));
181 _mm_storel_pi( (__m64*) p,
value );
185 _mm_store_ss( p,
value );
196 float4& operator -= (
const float4& s )
202 float4& operator *= (
const float4& s )
208 float4& operator /= (
const float4& s )
214 float4 operator + (
const float4& s )
const
216 return( _mm_add_ps(
value, s.value ));
219 float4 operator - (
const float4& s )
const
221 return( _mm_sub_ps(
value, s.value ));
224 float4 operator * (
const float4& s )
const
226 return( _mm_mul_ps(
value, s.value ));
229 float4 operator / (
const float4& s )
const
231 return( _mm_div_ps(
value, s.value ));
241 const __m128 res = _mm_add_ss( v, _mm_shuffle_ps( v, v, 1 ));
242 return( _mm_cvtss_f32( res ));
267 static void addu(
float*
const p,
const float4& v,
const int lim )
285 unsigned int prevrm = _MM_GET_ROUNDING_MODE();
286 _MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST );
288 const __m128 res = _mm_cvtepi32_ps( _mm_cvtps_epi32( v.
value ));
290 _MM_SET_ROUNDING_MODE( prevrm );
305 inline float4 clamp(
const float4& Value,
const float4& minv,
308 return( _mm_min_ps( _mm_max_ps( Value.value, minv.value ), maxv.value ));
311 typedef fpclass_def< avir :: float4, float > fpclass_float4;
319 #endif // AVIR_FLOAT4_SSE_INCLUDED
static float4 loadu(const float *const p, int lim)
Definition: avir_float4_sse.h:107
static float4 loadu(const float *const p)
Definition: avir_float4_sse.h:94
SIMD packed 4-float type.
Definition: avir_float4_sse.h:32
void store(float *const p) const
Definition: avir_float4_sse.h:139
float hadd() const
Definition: avir_float4_sse.h:238
static float4 load(const float *const p)
Definition: avir_float4_sse.h:83
void storeu(float *const p) const
Definition: avir_float4_sse.h:150
static void addu(float *const p, const float4 &v, const int lim)
Definition: avir_float4_sse.h:267
static void addu(float *const p, const float4 &v)
Definition: avir_float4_sse.h:253
void storeu(float *const p, int lim) const
Definition: avir_float4_sse.h:163
__m128 value
Definition: avir_float4_sse.h:272