16#ifndef AVIR_FLOAT8_AVX_INCLUDED
17#define AVIR_FLOAT8_AVX_INCLUDED
47 float8(
const __m256 s )
53 :
value( _mm256_set1_ps( (float) s ))
57 float8(
const float s )
58 :
value( _mm256_set1_ps( s ))
62 float8(
const double s )
63 :
value( _mm256_set1_ps( (float) s ))
67 float8& operator = (
const float8& s )
73 float8& operator = (
const __m256 s )
79 float8& operator = (
const float s )
81 value = _mm256_set1_ps( s );
85 operator float ()
const
87 return( _mm_cvtss_f32( _mm256_extractf128_ps(
value, 0 )));
98 static float8
load(
const float*
const p )
100 return( _mm256_load_ps( p ));
111 static float8
loadu(
const float*
const p )
113 return( _mm256_loadu_ps( p ));
126 static float8
loadu(
const float*
const p,
const int lim )
133 lo = _mm_loadu_ps( p );
134 hi = loadu4( p + 4, lim - 4 );
138 lo = loadu4( p, lim );
139 hi = _mm_setzero_ps();
142 return( _mm256_insertf128_ps( _mm256_castps128_ps256( lo ), hi, 1 ));
153 _mm256_store_ps( p,
value );
164 _mm256_storeu_ps( p,
value );
181 _mm_storeu_ps( p, _mm256_extractf128_ps(
value, 0 ));
182 v = _mm256_extractf128_ps(
value, 1 );
188 v = _mm256_extractf128_ps(
value, 0 );
195 _mm_storeu_ps( p, v );
199 _mm_storel_pi( (__m64*) p, v );
200 _mm_store_ss( p + 2, _mm_movehl_ps( v, v ));
207 _mm_storel_pi( (__m64*) p, v );
211 _mm_store_ss( p, v );
222 float8& operator -= (
const float8& s )
228 float8& operator *= (
const float8& s )
234 float8& operator /= (
const float8& s )
240 float8 operator + (
const float8& s )
const
242 return( _mm256_add_ps(
value, s.value ));
245 float8 operator - (
const float8& s )
const
247 return( _mm256_sub_ps(
value, s.value ));
250 float8 operator * (
const float8& s )
const
252 return( _mm256_mul_ps(
value, s.value ));
255 float8 operator / (
const float8& s )
const
257 return( _mm256_div_ps(
value, s.value ));
266 __m128 v = _mm_add_ps( _mm256_extractf128_ps(
value, 0 ),
267 _mm256_extractf128_ps(
value, 1 ));
269 v = _mm_hadd_ps( v, v );
270 v = _mm_hadd_ps( v, v );
272 return( _mm_cvtss_f32( v ));
283 static void addu(
float*
const p,
const float8& v )
297 static void addu(
float*
const p,
const float8& v,
const int lim )
313 static __m128 loadu4(
const float*
const p,
const int lim )
319 return( _mm_loadu_ps( p ));
323 return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ]));
330 return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ]));
334 return( _mm_load_ss( p ));
349 return( _mm256_round_ps( v.
value,
350 ( _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC )));
366 return( _mm256_min_ps( _mm256_max_ps( Value.
value, minv.
value ),
T clamp(const T &Value, const T minv, const T maxv)
"Clamps" (clips) the specified value so that it is not lesser than minv, and not greater than maxv.
Definition avir.h:149
T round(const T d)
Rounding function, based on the (int) typecast. Biased result. Not suitable for numbers greater than ...
Definition avir.h:131
Inclusion file for de-interleaved image resizing functions.
fpclass_def_dil< float, avir ::float8 > fpclass_float8_dil
Class that can be used as the "fpclass" template parameter of the avir::CImageResizer class to perfor...
Definition avir_float8_avx.h:370
Floating-point processing definition and abstraction class for de-interleaved processing.
Definition avir_dil.h:1017
SIMD packed 8-float type.
Definition avir_float8_avx.h:34
void store(float *const p) const
Stores this value to the specified memory location.
Definition avir_float8_avx.h:151
static float8 loadu(const float *const p, const int lim)
Returns float8 value loaded from the specified memory location, with elements beyond "lim" set to 0.
Definition avir_float8_avx.h:126
float hadd() const
Returns horizontal sum of elements.
Definition avir_float8_avx.h:264
static float8 loadu(const float *const p)
Returns float8 value loaded from the specified memory location.
Definition avir_float8_avx.h:111
static void addu(float *const p, const float8 &v)
Performs in-place addition of a value located in memory, and the specified value.
Definition avir_float8_avx.h:283
static void addu(float *const p, const float8 &v, const int lim)
Performs in-place addition of a value located in memory, and the specified value. Limited to the spec...
Definition avir_float8_avx.h:297
__m256 value
Packed value of 8 floats.
Definition avir_float8_avx.h:36
void storeu(float *p, int lim) const
Stores "lim" lower elements of this value to the specified memory location.
Definition avir_float8_avx.h:175
static float8 load(const float *const p)
Returns float8 value loaded from the specified memory location.
Definition avir_float8_avx.h:98
void storeu(float *const p) const
Stores this value to the specified memory location.
Definition avir_float8_avx.h:162