avir/Documentation/a00011_source.html

#ifndef AVIR_FLOAT4_SSE_INCLUDED

#define AVIR_FLOAT4_SSE_INCLUDED


#if defined( _MSC_VER )

    #include <intrin.h>

#else // defined( _MSC_VER )

    #include <emmintrin.h>

#endif // defined( _MSC_VER )


namespace avir {


class float4

{

public:

    __m128 value;


    float4()

    {

    }


    float4( const float4& s )

        : value( s.value )

    {

    }


    float4( const __m128 s )

        : value( s )

    {

    }


    float4( const int s )

        : value( _mm_set1_ps( (float) s ))

    {

    }


    float4( const float s )

        : value( _mm_set1_ps( s ))

    {

    }


    float4( const double s )

        : value( _mm_set1_ps( (float) s ))

    {

    }


    float4& operator = ( const float4& s )

    {

        value = s.value;

        return( *this );

    }


    float4& operator = ( const __m128 s )

    {

        value = s;

        return( *this );

    }


    float4& operator = ( const float s )

    {

        value = _mm_set1_ps( s );

        return( *this );

    }


    operator float () const

    {

        return( _mm_cvtss_f32( value ));

    }


    static float4 load( const float* const p )

    {

        return( _mm_load_ps( p ));

    }


    static float4 loadu( const float* const p )

    {

        return( _mm_loadu_ps( p ));

    }


    static float4 loadu( const float* const p, int lim )

    {

        if( lim > 2 )

        {

            if( lim > 3 )

            {

                return( _mm_loadu_ps( p ));

            }

            else

            {

                return( _mm_set_ps( 0.0f, p[ 2 ], p[ 1 ], p[ 0 ]));

            }

        }

        else

        {

            if( lim == 2 )

            {

                return( _mm_set_ps( 0.0f, 0.0f, p[ 1 ], p[ 0 ]));

            }

            else

            {

                return( _mm_load_ss( p ));

            }

        }

    }


    void store( float* const p ) const

    {

        _mm_store_ps( p, value );

    }


    void storeu( float* const p ) const

    {

        _mm_storeu_ps( p, value );

    }


    void storeu( float* const p, int lim ) const

    {

        if( lim > 2 )

        {

            if( lim > 3 )

            {

                _mm_storeu_ps( p, value );

            }

            else

            {

                _mm_storel_pi( (__m64*) p, value );

                _mm_store_ss( p + 2, _mm_movehl_ps( value, value ));

            }

        }

        else

        {

            if( lim == 2 )

            {

                _mm_storel_pi( (__m64*) p, value );

            }

            else

            {

                _mm_store_ss( p, value );

            }

        }

    }


    float4& operator += ( const float4& s )

    {

        value = _mm_add_ps( value, s.value );

        return( *this );

    }


    float4& operator -= ( const float4& s )

    {

        value = _mm_sub_ps( value, s.value );

        return( *this );

    }


    float4& operator *= ( const float4& s )

    {

        value = _mm_mul_ps( value, s.value );

        return( *this );

    }


    float4& operator /= ( const float4& s )

    {

        value = _mm_div_ps( value, s.value );

        return( *this );

    }


    float4 operator + ( const float4& s ) const

    {

        return( _mm_add_ps( value, s.value ));

    }


    float4 operator - ( const float4& s ) const

    {

        return( _mm_sub_ps( value, s.value ));

    }


    float4 operator * ( const float4& s ) const

    {

        return( _mm_mul_ps( value, s.value ));

    }


    float4 operator / ( const float4& s ) const

    {

        return( _mm_div_ps( value, s.value ));

    }


    float hadd() const

    {

        const __m128 v = _mm_add_ps( value, _mm_movehl_ps( value, value ));

        const __m128 res = _mm_add_ss( v, _mm_shuffle_ps( v, v, 1 ));


        return( _mm_cvtss_f32( res ));

    }


    static void addu( float* const p, const float4& v )

    {

        ( loadu( p ) + v ).storeu( p );

    }


    static void addu( float* const p, const float4& v, const int lim )

    {

        ( loadu( p, lim ) + v ).storeu( p, lim );

    }


};


inline float4 round( const float4& v )

{

    unsigned int prevrm = _MM_GET_ROUNDING_MODE();

    _MM_SET_ROUNDING_MODE( _MM_ROUND_NEAREST );


    const __m128 res = _mm_cvtepi32_ps( _mm_cvtps_epi32( v.value ));


    _MM_SET_ROUNDING_MODE( prevrm );


    return( res );

}


inline float4 clamp( const float4& Value, const float4& minv,

    const float4& maxv )

{

    return( _mm_min_ps( _mm_max_ps( Value.value, minv.value ), maxv.value ));

}


typedef fpclass_def< avir :: float4, float > fpclass_float4;


} // namespace avir


#endif // AVIR_FLOAT4_SSE_INCLUDED

avir::clamp
T clamp(const T &Value, const T minv, const T maxv)
"Clamps" (clips) the specified value so that it is not lesser than minv, and not greater than maxv.
Definition avir.h:149

avir::round
T round(const T d)
Rounding function, based on the (int) typecast. Biased result. Not suitable for numbers greater than ...
Definition avir.h:131

avir::fpclass_float4
fpclass_def< avir ::float4, float > fpclass_float4
Class that can be used as the "fpclass" template parameter of the avir::CImageResizer class to perfor...
Definition avir_float4_sse.h:331

avir::fpclass_def
Floating-point processing definition and abstraction class.
Definition avir.h:4572

avir::float4
SIMD packed 4-float type.
Definition avir_float4_sse.h:37

avir::float4::addu
static void addu(float *const p, const float4 &v, const int lim)
Performs in-place addition of a value located in memory, and the specified value. Limited to the spec...
Definition avir_float4_sse.h:290

avir::float4::hadd
float hadd() const
Returns horizontal sum of elements.
Definition avir_float4_sse.h:260

avir::float4::storeu
void storeu(float *const p, int lim) const
Stores "lim" lower elements of this value to the specified memory location.
Definition avir_float4_sse.h:185

avir::float4::storeu
void storeu(float *const p) const
Stores this value to the specified memory location.
Definition avir_float4_sse.h:172

avir::float4::addu
static void addu(float *const p, const float4 &v)
Performs in-place addition of a value located in memory, and the specified value.
Definition avir_float4_sse.h:276

avir::float4::value
__m128 value
Packed value of 4 floats.
Definition avir_float4_sse.h:39

avir::float4::loadu
static float4 loadu(const float *const p, int lim)
Returns float4 value loaded from the specified memory location, with elements beyond "lim" set to 0.
Definition avir_float4_sse.h:129

avir::float4::load
static float4 load(const float *const p)
Returns float4 value loaded from the specified memory location.
Definition avir_float4_sse.h:101

avir::float4::store
void store(float *const p) const
Stores this value to the specified memory location.
Definition avir_float4_sse.h:161

avir::float4::loadu
static float4 loadu(const float *const p)
Returns float4 value loaded from the specified memory location.
Definition avir_float4_sse.h:114