AVIR
High-quality pro image resizing library
 
All Classes Files Functions Variables Typedefs Macros Pages
Loading...
Searching...
No Matches
avir_float4_sse.h
Go to the documentation of this file.
1//$ nobt
2//$ nocpp
3
14
15#ifndef AVIR_FLOAT4_SSE_INCLUDED
16#define AVIR_FLOAT4_SSE_INCLUDED
17
18#include <xmmintrin.h>
19#include <emmintrin.h>
20
21namespace avir {
22
31
32class float4
33{
34public:
35 float4()
36 {
37 }
38
39 float4( const float4& s )
40 : value( s.value )
41 {
42 }
43
44 float4( const __m128 s )
45 : value( s )
46 {
47 }
48
49 float4( const float s )
50 : value( _mm_set1_ps( s ))
51 {
52 }
53
54 float4& operator = ( const float4& s )
55 {
56 value = s.value;
57 return( *this );
58 }
59
60 float4& operator = ( const __m128 s )
61 {
62 value = s;
63 return( *this );
64 }
65
66 float4& operator = ( const float s )
67 {
68 value = _mm_set1_ps( s );
69 return( *this );
70 }
71
72 operator float () const
73 {
74 return( _mm_cvtss_f32( value ));
75 }
76
82
83 static float4 load( const float* const p )
84 {
85 return( _mm_load_ps( p ));
86 }
87
93
94 static float4 loadu( const float* const p )
95 {
96 return( _mm_loadu_ps( p ));
97 }
98
106
107 static float4 loadu( const float* const p, int lim )
108 {
109 if( lim > 2 )
110 {
111 if( lim > 3 )
112 {
113 return( _mm_loadu_ps( p ));
114 }
115 else
116 {
117 return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ]));
118 }
119 }
120 else
121 {
122 if( lim == 2 )
123 {
124 return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ]));
125 }
126 else
127 {
128 return( _mm_load_ss( p ));
129 }
130 }
131 }
132
138
139 void store( float* const p ) const
140 {
141 _mm_store_ps( p, value );
142 }
143
149
150 void storeu( float* const p ) const
151 {
152 _mm_storeu_ps( p, value );
153 }
154
162
163 void storeu( float* const p, int lim ) const
164 {
165 if( lim > 2 )
166 {
167 if( lim > 3 )
168 {
169 _mm_storeu_ps( p, value );
170 }
171 else
172 {
173 _mm_storel_pi( (__m64*) p, value );
174 _mm_store_ss( p + 2, _mm_movehl_ps( value, value ));
175 }
176 }
177 else
178 {
179 if( lim == 2 )
180 {
181 _mm_storel_pi( (__m64*) p, value );
182 }
183 else
184 {
185 _mm_store_ss( p, value );
186 }
187 }
188 }
189
190 float4& operator += ( const float4& s )
191 {
192 value = _mm_add_ps( value, s.value );
193 return( *this );
194 }
195
196 float4& operator -= ( const float4& s )
197 {
198 value = _mm_sub_ps( value, s.value );
199 return( *this );
200 }
201
202 float4& operator *= ( const float4& s )
203 {
204 value = _mm_mul_ps( value, s.value );
205 return( *this );
206 }
207
208 float4& operator /= ( const float4& s )
209 {
210 value = _mm_div_ps( value, s.value );
211 return( *this );
212 }
213
214 float4 operator + ( const float4& s ) const
215 {
216 return( _mm_add_ps( value, s.value ));
217 }
218
219 float4 operator - ( const float4& s ) const
220 {
221 return( _mm_sub_ps( value, s.value ));
222 }
223
224 float4 operator * ( const float4& s ) const
225 {
226 return( _mm_mul_ps( value, s.value ));
227 }
228
229 float4 operator / ( const float4& s ) const
230 {
231 return( _mm_div_ps( value, s.value ));
232 }
233
237
238 float hadd() const
239 {
240 const __m128 v = _mm_add_ps( value, _mm_movehl_ps( value, value ));
241 const __m128 res = _mm_add_ss( v, _mm_shuffle_ps( v, v, 1 ));
242 return( _mm_cvtss_f32( res ));
243 }
244
252
253 static void addu( float* const p, const float4& v )
254 {
255 ( loadu( p ) + v ).storeu( p );
256 }
257
266
267 static void addu( float* const p, const float4& v, const int lim )
268 {
269 ( loadu( p, lim ) + v ).storeu( p, lim );
270 }
271
272 __m128 value;
274};
275
282
283inline float4 round( const float4& v )
284{
285 unsigned int prevrm = _MM_GET_ROUNDING_MODE();
286 _MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST );
287
288 const __m128 res = _mm_cvtepi32_ps( _mm_cvtps_epi32( v.value ));
289
290 _MM_SET_ROUNDING_MODE( prevrm );
291
292 return( res );
293}
294
304
305inline float4 clamp( const float4& Value, const float4& minv,
306 const float4& maxv )
307{
308 return( _mm_min_ps( _mm_max_ps( Value.value, minv.value ), maxv.value ));
309}
310
316
317} // namespace avir
318
319#endif // AVIR_FLOAT4_SSE_INCLUDED
T clamp(const T &Value, const T minv, const T maxv)
Definition avir.h:121
T round(const T d)
Definition avir.h:104
fpclass_def< avir ::float4, float > fpclass_float4
Class that can be used as the "fpclass" template parameter of the avir::CImageResizer class to perfor...
Definition avir_float4_sse.h:311
Floating-point processing definition and abstraction class.
Definition avir.h:4571
SIMD packed 4-float type.
Definition avir_float4_sse.h:33
static void addu(float *const p, const float4 &v, const int lim)
Definition avir_float4_sse.h:267
float hadd() const
Definition avir_float4_sse.h:238
void storeu(float *const p, int lim) const
Definition avir_float4_sse.h:163
void storeu(float *const p) const
Definition avir_float4_sse.h:150
static void addu(float *const p, const float4 &v)
Definition avir_float4_sse.h:253
__m128 value
Packed value of 4 floats.
Definition avir_float4_sse.h:272
static float4 loadu(const float *const p, int lim)
Definition avir_float4_sse.h:107
static float4 load(const float *const p)
Definition avir_float4_sse.h:83
void store(float *const p) const
Definition avir_float4_sse.h:139
static float4 loadu(const float *const p)
Definition avir_float4_sse.h:94