AVIR
High-quality pro image resizing library
 
Loading...
Searching...
No Matches
avir_float4_sse.h
Go to the documentation of this file.
1
15
16#ifndef AVIR_FLOAT4_SSE_INCLUDED
17#define AVIR_FLOAT4_SSE_INCLUDED
18
19#if defined( _MSC_VER )
20 #include <intrin.h>
21#else // defined( _MSC_VER )
22 #include <emmintrin.h>
23#endif // defined( _MSC_VER )
24
25namespace avir {
26
35
36class float4
37{
38public:
39 __m128 value;
40
41 float4()
42 {
43 }
44
45 float4( const float4& s )
46 : value( s.value )
47 {
48 }
49
50 float4( const __m128 s )
51 : value( s )
52 {
53 }
54
55 float4( const int s )
56 : value( _mm_set1_ps( (float) s ))
57 {
58 }
59
60 float4( const float s )
61 : value( _mm_set1_ps( s ))
62 {
63 }
64
65 float4( const double s )
66 : value( _mm_set1_ps( (float) s ))
67 {
68 }
69
70 float4& operator = ( const float4& s )
71 {
72 value = s.value;
73 return( *this );
74 }
75
76 float4& operator = ( const __m128 s )
77 {
78 value = s;
79 return( *this );
80 }
81
82 float4& operator = ( const float s )
83 {
84 value = _mm_set1_ps( s );
85 return( *this );
86 }
87
88 operator float () const
89 {
90 return( _mm_cvtss_f32( value ));
91 }
92
100
101 static float4 load( const float* const p )
102 {
103 return( _mm_load_ps( p ));
104 }
105
113
114 static float4 loadu( const float* const p )
115 {
116 return( _mm_loadu_ps( p ));
117 }
118
128
129 static float4 loadu( const float* const p, int lim )
130 {
131 if( lim > 2 )
132 {
133 if( lim > 3 )
134 {
135 return( _mm_loadu_ps( p ));
136 }
137 else
138 {
139 return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ]));
140 }
141 }
142 else
143 {
144 if( lim == 2 )
145 {
146 return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ]));
147 }
148 else
149 {
150 return( _mm_load_ss( p ));
151 }
152 }
153 }
154
160
161 void store( float* const p ) const
162 {
163 _mm_store_ps( p, value );
164 }
165
171
172 void storeu( float* const p ) const
173 {
174 _mm_storeu_ps( p, value );
175 }
176
184
185 void storeu( float* const p, int lim ) const
186 {
187 if( lim > 2 )
188 {
189 if( lim > 3 )
190 {
191 _mm_storeu_ps( p, value );
192 }
193 else
194 {
195 _mm_storel_pi( (__m64*) p, value );
196 _mm_store_ss( p + 2, _mm_movehl_ps( value, value ));
197 }
198 }
199 else
200 {
201 if( lim == 2 )
202 {
203 _mm_storel_pi( (__m64*) p, value );
204 }
205 else
206 {
207 _mm_store_ss( p, value );
208 }
209 }
210 }
211
212 float4& operator += ( const float4& s )
213 {
214 value = _mm_add_ps( value, s.value );
215 return( *this );
216 }
217
218 float4& operator -= ( const float4& s )
219 {
220 value = _mm_sub_ps( value, s.value );
221 return( *this );
222 }
223
224 float4& operator *= ( const float4& s )
225 {
226 value = _mm_mul_ps( value, s.value );
227 return( *this );
228 }
229
230 float4& operator /= ( const float4& s )
231 {
232 value = _mm_div_ps( value, s.value );
233 return( *this );
234 }
235
236 float4 operator + ( const float4& s ) const
237 {
238 return( _mm_add_ps( value, s.value ));
239 }
240
241 float4 operator - ( const float4& s ) const
242 {
243 return( _mm_sub_ps( value, s.value ));
244 }
245
246 float4 operator * ( const float4& s ) const
247 {
248 return( _mm_mul_ps( value, s.value ));
249 }
250
251 float4 operator / ( const float4& s ) const
252 {
253 return( _mm_div_ps( value, s.value ));
254 }
255
259
260 float hadd() const
261 {
262 const __m128 v = _mm_add_ps( value, _mm_movehl_ps( value, value ));
263 const __m128 res = _mm_add_ss( v, _mm_shuffle_ps( v, v, 1 ));
264
265 return( _mm_cvtss_f32( res ));
266 }
267
275
276 static void addu( float* const p, const float4& v )
277 {
278 ( loadu( p ) + v ).storeu( p );
279 }
280
289
290 static void addu( float* const p, const float4& v, const int lim )
291 {
292 ( loadu( p, lim ) + v ).storeu( p, lim );
293 }
294};
295
302
303inline float4 round( const float4& v )
304{
305 unsigned int prevrm = _MM_GET_ROUNDING_MODE();
306 _MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST );
307
308 const __m128 res = _mm_cvtepi32_ps( _mm_cvtps_epi32( v.value ));
309
310 _MM_SET_ROUNDING_MODE( prevrm );
311
312 return( res );
313}
314
324
325inline float4 clamp( const float4& Value, const float4& minv,
326 const float4& maxv )
327{
328 return( _mm_min_ps( _mm_max_ps( Value.value, minv.value ), maxv.value ));
329}
330
335
336} // namespace avir
337
338#endif // AVIR_FLOAT4_SSE_INCLUDED
T clamp(const T &Value, const T minv, const T maxv)
"Clamps" (clips) the specified value so that it is not lesser than minv, and not greater than maxv.
Definition avir.h:149
T round(const T d)
Rounding function, based on the (int) typecast. Biased result. Not suitable for numbers greater than ...
Definition avir.h:131
fpclass_def< avir ::float4, float > fpclass_float4
Class that can be used as the "fpclass" template parameter of the avir::CImageResizer class to perfor...
Definition avir_float4_sse.h:331
Floating-point processing definition and abstraction class.
Definition avir.h:4572
SIMD packed 4-float type.
Definition avir_float4_sse.h:37
static void addu(float *const p, const float4 &v, const int lim)
Performs in-place addition of a value located in memory, and the specified value. Limited to the spec...
Definition avir_float4_sse.h:290
float hadd() const
Returns horizontal sum of elements.
Definition avir_float4_sse.h:260
void storeu(float *const p, int lim) const
Stores "lim" lower elements of this value to the specified memory location.
Definition avir_float4_sse.h:185
void storeu(float *const p) const
Stores this value to the specified memory location.
Definition avir_float4_sse.h:172
static void addu(float *const p, const float4 &v)
Performs in-place addition of a value located in memory, and the specified value.
Definition avir_float4_sse.h:276
__m128 value
Packed value of 4 floats.
Definition avir_float4_sse.h:39
static float4 loadu(const float *const p, int lim)
Returns float4 value loaded from the specified memory location, with elements beyond "lim" set to 0.
Definition avir_float4_sse.h:129
static float4 load(const float *const p)
Returns float4 value loaded from the specified memory location.
Definition avir_float4_sse.h:101
void store(float *const p) const
Stores this value to the specified memory location.
Definition avir_float4_sse.h:161
static float4 loadu(const float *const p)
Returns float4 value loaded from the specified memory location.
Definition avir_float4_sse.h:114