AVIR
High-quality pro image resizing library
 
Loading...
Searching...
No Matches
avir_float8_avx.h
Go to the documentation of this file.
1
15
16#ifndef AVIR_FLOAT8_AVX_INCLUDED
17#define AVIR_FLOAT8_AVX_INCLUDED
18
19#include <immintrin.h>
20#include "avir_dil.h"
21
22namespace avir {
23
32
33class float8
34{
35public:
36 __m256 value;
37
38 float8()
39 {
40 }
41
42 float8( const float8& s )
43 : value( s.value )
44 {
45 }
46
47 float8( const __m256 s )
48 : value( s )
49 {
50 }
51
52 float8( const int s )
53 : value( _mm256_set1_ps( (float) s ))
54 {
55 }
56
57 float8( const float s )
58 : value( _mm256_set1_ps( s ))
59 {
60 }
61
62 float8( const double s )
63 : value( _mm256_set1_ps( (float) s ))
64 {
65 }
66
67 float8& operator = ( const float8& s )
68 {
69 value = s.value;
70 return( *this );
71 }
72
73 float8& operator = ( const __m256 s )
74 {
75 value = s;
76 return( *this );
77 }
78
79 float8& operator = ( const float s )
80 {
81 value = _mm256_set1_ps( s );
82 return( *this );
83 }
84
85 operator float () const
86 {
87 return( _mm_cvtss_f32( _mm256_extractf128_ps( value, 0 )));
88 }
89
97
98 static float8 load( const float* const p )
99 {
100 return( _mm256_load_ps( p ));
101 }
102
110
111 static float8 loadu( const float* const p )
112 {
113 return( _mm256_loadu_ps( p ));
114 }
115
125
126 static float8 loadu( const float* const p, const int lim )
127 {
128 __m128 lo;
129 __m128 hi;
130
131 if( lim > 4 )
132 {
133 lo = _mm_loadu_ps( p );
134 hi = loadu4( p + 4, lim - 4 );
135 }
136 else
137 {
138 lo = loadu4( p, lim );
139 hi = _mm_setzero_ps();
140 }
141
142 return( _mm256_insertf128_ps( _mm256_castps128_ps256( lo ), hi, 1 ));
143 }
144
150
151 void store( float* const p ) const
152 {
153 _mm256_store_ps( p, value );
154 }
155
161
162 void storeu( float* const p ) const
163 {
164 _mm256_storeu_ps( p, value );
165 }
166
174
175 void storeu( float* p, int lim ) const
176 {
177 __m128 v;
178
179 if( lim > 4 )
180 {
181 _mm_storeu_ps( p, _mm256_extractf128_ps( value, 0 ));
182 v = _mm256_extractf128_ps( value, 1 );
183 p += 4;
184 lim -= 4;
185 }
186 else
187 {
188 v = _mm256_extractf128_ps( value, 0 );
189 }
190
191 if( lim > 2 )
192 {
193 if( lim > 3 )
194 {
195 _mm_storeu_ps( p, v );
196 }
197 else
198 {
199 _mm_storel_pi( (__m64*) p, v );
200 _mm_store_ss( p + 2, _mm_movehl_ps( v, v ));
201 }
202 }
203 else
204 {
205 if( lim == 2 )
206 {
207 _mm_storel_pi( (__m64*) p, v );
208 }
209 else
210 {
211 _mm_store_ss( p, v );
212 }
213 }
214 }
215
216 float8& operator += ( const float8& s )
217 {
218 value = _mm256_add_ps( value, s.value );
219 return( *this );
220 }
221
222 float8& operator -= ( const float8& s )
223 {
224 value = _mm256_sub_ps( value, s.value );
225 return( *this );
226 }
227
228 float8& operator *= ( const float8& s )
229 {
230 value = _mm256_mul_ps( value, s.value );
231 return( *this );
232 }
233
234 float8& operator /= ( const float8& s )
235 {
236 value = _mm256_div_ps( value, s.value );
237 return( *this );
238 }
239
240 float8 operator + ( const float8& s ) const
241 {
242 return( _mm256_add_ps( value, s.value ));
243 }
244
245 float8 operator - ( const float8& s ) const
246 {
247 return( _mm256_sub_ps( value, s.value ));
248 }
249
250 float8 operator * ( const float8& s ) const
251 {
252 return( _mm256_mul_ps( value, s.value ));
253 }
254
255 float8 operator / ( const float8& s ) const
256 {
257 return( _mm256_div_ps( value, s.value ));
258 }
259
263
264 float hadd() const
265 {
266 __m128 v = _mm_add_ps( _mm256_extractf128_ps( value, 0 ),
267 _mm256_extractf128_ps( value, 1 ));
268
269 v = _mm_hadd_ps( v, v );
270 v = _mm_hadd_ps( v, v );
271
272 return( _mm_cvtss_f32( v ));
273 }
274
282
283 static void addu( float* const p, const float8& v )
284 {
285 ( loadu( p ) + v ).storeu( p );
286 }
287
296
297 static void addu( float* const p, const float8& v, const int lim )
298 {
299 ( loadu( p, lim ) + v ).storeu( p, lim );
300 }
301
302private:
312
313 static __m128 loadu4( const float* const p, const int lim )
314 {
315 if( lim > 2 )
316 {
317 if( lim > 3 )
318 {
319 return( _mm_loadu_ps( p ));
320 }
321 else
322 {
323 return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ]));
324 }
325 }
326 else
327 {
328 if( lim == 2 )
329 {
330 return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ]));
331 }
332 else
333 {
334 return( _mm_load_ss( p ));
335 }
336 }
337 }
338};
339
346
347inline float8 round( const float8& v )
348{
349 return( _mm256_round_ps( v.value,
350 ( _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC )));
351}
352
362
363inline float8 clamp( const float8& Value, const float8& minv,
364 const float8& maxv )
365{
366 return( _mm256_min_ps( _mm256_max_ps( Value.value, minv.value ),
367 maxv.value ));
368}
369
374
375} // namespace avir
376
377#endif // AVIR_FLOAT8_AVX_INCLUDED
T clamp(const T &Value, const T minv, const T maxv)
"Clamps" (clips) the specified value so that it is not lesser than minv, and not greater than maxv.
Definition avir.h:149
T round(const T d)
Rounding function, based on the (int) typecast. Biased result. Not suitable for numbers greater than ...
Definition avir.h:131
Inclusion file for de-interleaved image resizing functions.
fpclass_def_dil< float, avir ::float8 > fpclass_float8_dil
Class that can be used as the "fpclass" template parameter of the avir::CImageResizer class to perfor...
Definition avir_float8_avx.h:370
Floating-point processing definition and abstraction class for de-interleaved processing.
Definition avir_dil.h:1017
SIMD packed 8-float type.
Definition avir_float8_avx.h:34
void store(float *const p) const
Stores this value to the specified memory location.
Definition avir_float8_avx.h:151
static float8 loadu(const float *const p, const int lim)
Returns float8 value loaded from the specified memory location, with elements beyond "lim" set to 0.
Definition avir_float8_avx.h:126
float hadd() const
Returns horizontal sum of elements.
Definition avir_float8_avx.h:264
static float8 loadu(const float *const p)
Returns float8 value loaded from the specified memory location.
Definition avir_float8_avx.h:111
static void addu(float *const p, const float8 &v)
Performs in-place addition of a value located in memory, and the specified value.
Definition avir_float8_avx.h:283
static void addu(float *const p, const float8 &v, const int lim)
Performs in-place addition of a value located in memory, and the specified value. Limited to the spec...
Definition avir_float8_avx.h:297
__m256 value
Packed value of 8 floats.
Definition avir_float8_avx.h:36
void storeu(float *p, int lim) const
Stores "lim" lower elements of this value to the specified memory location.
Definition avir_float8_avx.h:175
static float8 load(const float *const p)
Returns float8 value loaded from the specified memory location.
Definition avir_float8_avx.h:98
void storeu(float *const p) const
Stores this value to the specified memory location.
Definition avir_float8_avx.h:162