16#ifndef AVIR_FLOAT4_SSE_INCLUDED
17#define AVIR_FLOAT4_SSE_INCLUDED
19#if defined( _MSC_VER )
22 #include <emmintrin.h>
50 float4(
const __m128 s )
56 :
value( _mm_set1_ps( (float) s ))
60 float4(
const float s )
61 :
value( _mm_set1_ps( s ))
65 float4(
const double s )
66 :
value( _mm_set1_ps( (float) s ))
70 float4& operator = (
const float4& s )
76 float4& operator = (
const __m128 s )
82 float4& operator = (
const float s )
84 value = _mm_set1_ps( s );
88 operator float ()
const
90 return( _mm_cvtss_f32(
value ));
101 static float4
load(
const float*
const p )
103 return( _mm_load_ps( p ));
114 static float4
loadu(
const float*
const p )
116 return( _mm_loadu_ps( p ));
129 static float4
loadu(
const float*
const p,
int lim )
135 return( _mm_loadu_ps( p ));
139 return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ]));
146 return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ]));
150 return( _mm_load_ss( p ));
163 _mm_store_ps( p,
value );
174 _mm_storeu_ps( p,
value );
185 void storeu(
float*
const p,
int lim )
const
191 _mm_storeu_ps( p,
value );
195 _mm_storel_pi( (__m64*) p,
value );
196 _mm_store_ss( p + 2, _mm_movehl_ps(
value,
value ));
203 _mm_storel_pi( (__m64*) p,
value );
207 _mm_store_ss( p,
value );
218 float4& operator -= (
const float4& s )
224 float4& operator *= (
const float4& s )
230 float4& operator /= (
const float4& s )
236 float4 operator + (
const float4& s )
const
238 return( _mm_add_ps(
value, s.value ));
241 float4 operator - (
const float4& s )
const
243 return( _mm_sub_ps(
value, s.value ));
246 float4 operator * (
const float4& s )
const
248 return( _mm_mul_ps(
value, s.value ));
251 float4 operator / (
const float4& s )
const
253 return( _mm_div_ps(
value, s.value ));
263 const __m128 res = _mm_add_ss( v, _mm_shuffle_ps( v, v, 1 ));
265 return( _mm_cvtss_f32( res ));
276 static void addu(
float*
const p,
const float4& v )
290 static void addu(
float*
const p,
const float4& v,
const int lim )
305 unsigned int prevrm = _MM_GET_ROUNDING_MODE();
306 _MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST );
308 const __m128 res = _mm_cvtepi32_ps( _mm_cvtps_epi32( v.
value ));
310 _MM_SET_ROUNDING_MODE( prevrm );
328 return( _mm_min_ps( _mm_max_ps( Value.
value, minv.
value ), maxv.
value ));
T clamp(const T &Value, const T minv, const T maxv)
"Clamps" (clips) the specified value so that it is not lesser than minv, and not greater than maxv.
Definition avir.h:149
T round(const T d)
Rounding function, based on the (int) typecast. Biased result. Not suitable for numbers greater than ...
Definition avir.h:131
fpclass_def< avir ::float4, float > fpclass_float4
Class that can be used as the "fpclass" template parameter of the avir::CImageResizer class to perfor...
Definition avir_float4_sse.h:331
Floating-point processing definition and abstraction class.
Definition avir.h:4572
SIMD packed 4-float type.
Definition avir_float4_sse.h:37
static void addu(float *const p, const float4 &v, const int lim)
Performs in-place addition of a value located in memory, and the specified value. Limited to the spec...
Definition avir_float4_sse.h:290
float hadd() const
Returns horizontal sum of elements.
Definition avir_float4_sse.h:260
void storeu(float *const p, int lim) const
Stores "lim" lower elements of this value to the specified memory location.
Definition avir_float4_sse.h:185
void storeu(float *const p) const
Stores this value to the specified memory location.
Definition avir_float4_sse.h:172
static void addu(float *const p, const float4 &v)
Performs in-place addition of a value located in memory, and the specified value.
Definition avir_float4_sse.h:276
__m128 value
Packed value of 4 floats.
Definition avir_float4_sse.h:39
static float4 loadu(const float *const p, int lim)
Returns float4 value loaded from the specified memory location, with elements beyond "lim" set to 0.
Definition avir_float4_sse.h:129
static float4 load(const float *const p)
Returns float4 value loaded from the specified memory location.
Definition avir_float4_sse.h:101
void store(float *const p) const
Stores this value to the specified memory location.
Definition avir_float4_sse.h:161
static float4 loadu(const float *const p)
Returns float4 value loaded from the specified memory location.
Definition avir_float4_sse.h:114