15#ifndef AVIR_FLOAT4_SSE_INCLUDED
16#define AVIR_FLOAT4_SSE_INCLUDED
39 float4(
const float4& s )
44 float4(
const __m128 s )
49 float4(
const float s )
50 :
value( _mm_set1_ps( s ))
54 float4& operator = (
const float4& s )
60 float4& operator = (
const __m128 s )
66 float4& operator = (
const float s )
68 value = _mm_set1_ps( s );
72 operator float ()
const
74 return( _mm_cvtss_f32(
value ));
83 static float4
load(
const float*
const p )
85 return( _mm_load_ps( p ));
83 static float4
load(
const float*
const p ) {
…}
94 static float4
loadu(
const float*
const p )
96 return( _mm_loadu_ps( p ));
94 static float4
loadu(
const float*
const p ) {
…}
107 static float4
loadu(
const float*
const p,
int lim )
113 return( _mm_loadu_ps( p ));
117 return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ]));
124 return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ]));
128 return( _mm_load_ss( p ));
107 static float4
loadu(
const float*
const p,
int lim ) {
…}
141 _mm_store_ps( p,
value );
152 _mm_storeu_ps( p,
value );
163 void storeu(
float*
const p,
int lim )
const
169 _mm_storeu_ps( p,
value );
173 _mm_storel_pi( (__m64*) p,
value );
174 _mm_store_ss( p + 2, _mm_movehl_ps(
value,
value ));
181 _mm_storel_pi( (__m64*) p,
value );
185 _mm_store_ss( p,
value );
163 void storeu(
float*
const p,
int lim )
const {
…}
196 float4& operator -= (
const float4& s )
202 float4& operator *= (
const float4& s )
208 float4& operator /= (
const float4& s )
214 float4 operator + (
const float4& s )
const
216 return( _mm_add_ps(
value, s.value ));
219 float4 operator - (
const float4& s )
const
221 return( _mm_sub_ps(
value, s.value ));
224 float4 operator * (
const float4& s )
const
226 return( _mm_mul_ps(
value, s.value ));
229 float4 operator / (
const float4& s )
const
231 return( _mm_div_ps(
value, s.value ));
241 const __m128 res = _mm_add_ss( v, _mm_shuffle_ps( v, v, 1 ));
242 return( _mm_cvtss_f32( res ));
253 static void addu(
float*
const p,
const float4& v )
253 static void addu(
float*
const p,
const float4& v ) {
…}
267 static void addu(
float*
const p,
const float4& v,
const int lim )
267 static void addu(
float*
const p,
const float4& v,
const int lim ) {
…}
285 unsigned int prevrm = _MM_GET_ROUNDING_MODE();
286 _MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST );
288 const __m128 res = _mm_cvtepi32_ps( _mm_cvtps_epi32( v.
value ));
290 _MM_SET_ROUNDING_MODE( prevrm );
308 return( _mm_min_ps( _mm_max_ps( Value.
value, minv.
value ), maxv.
value ));
T clamp(const T &Value, const T minv, const T maxv)
Definition avir.h:121
T round(const T d)
Definition avir.h:104
fpclass_def< avir ::float4, float > fpclass_float4
Class that can be used as the "fpclass" template parameter of the avir::CImageResizer class to perfor...
Definition avir_float4_sse.h:311
Floating-point processing definition and abstraction class.
Definition avir.h:4571
SIMD packed 4-float type.
Definition avir_float4_sse.h:33
static void addu(float *const p, const float4 &v, const int lim)
Definition avir_float4_sse.h:267
float hadd() const
Definition avir_float4_sse.h:238
void storeu(float *const p, int lim) const
Definition avir_float4_sse.h:163
void storeu(float *const p) const
Definition avir_float4_sse.h:150
static void addu(float *const p, const float4 &v)
Definition avir_float4_sse.h:253
__m128 value
Packed value of 4 floats.
Definition avir_float4_sse.h:272
static float4 loadu(const float *const p, int lim)
Definition avir_float4_sse.h:107
static float4 load(const float *const p)
Definition avir_float4_sse.h:83
void store(float *const p) const
Definition avir_float4_sse.h:139
static float4 loadu(const float *const p)
Definition avir_float4_sse.h:94