#include <avir_float4_sse.h>

Public Member Functions
	float4 (const float4 &s)

	float4 (const __m128 s)

	float4 (const float s)

float	hadd () const

	operator float () const

float4	operator* (const float4 &s) const

float4 &	*operator=** (const float4 &s)

float4	operator+ (const float4 &s) const

float4 &	operator+= (const float4 &s)

float4	operator- (const float4 &s) const

float4 &	operator-= (const float4 &s)

float4	operator/ (const float4 &s) const

float4 &	operator/= (const float4 &s)

float4 &	operator= (const float4 &s)

float4 &	operator= (const __m128 s)

float4 &	operator= (const float s)

void	store (float *const p) const

void	storeu (float *const p) const

void	storeu (float *const p, int lim) const

Static Public Member Functions
static void	addu (float *const p, const float4 &v)

static void	addu (float *const p, const float4 &v, const int lim)

static float4	load (const float *const p)

static float4	loadu (const float *const p)

static float4	loadu (const float *const p, int lim)

Public Attributes
__m128	value

Detailed Description

SIMD packed 4-float type.

This class implements a packed 4-float type that can be used to perform parallel computation using SIMD instructions on SSE-enabled processors. This class can be used as the "fptype" argument of the avir::fpclass_def class.

Member Function Documentation

static void avir::float4::addu	(	float *const	p,
		const float4 &	v
	)

static

Function performs in-place addition of a value located in memory and the specified value.

Parameters

p	Pointer to value where addition happens. May be unaligned.
v	Value to add.

static void avir::float4::addu	(	float *const	p,
		const float4 &	v,
		const int	lim
	)

static

Function performs in-place addition of a value located in memory and the specified value. Limited to the specfied number of elements.

Parameters

p	Pointer to value where addition happens. May be unaligned.
v	Value to add.
lim	The element number limit, >0.

float avir::float4::hadd ( ) const

Returns: Horizontal sum of elements.

static float4 avir::float4::load ( const float *const p )

static

Parameters

p	Pointer to memory from where the value should be loaded, should be 16-byte aligned.

Returns: float4 value loaded from the specified memory location.

static float4 avir::float4::loadu ( const float *const p )

static

Parameters

p	Pointer to memory from where the value should be loaded, may have any alignment.

Returns: float4 value loaded from the specified memory location.

static float4 avir::float4::loadu	(	const float *const	p,
		int	lim
	)

static

Parameters

p	Pointer to memory from where the value should be loaded, may have any alignment.
lim	The maximum number of elements to load, >0.

Returns: float4 value loaded from the specified memory location, with elements beyond "lim" set to 0.

void avir::float4::store ( float *const p ) const

Function stores *this value to the specified memory location.

Parameters

[out] p Output memory location, should be 16-byte aligned.

void avir::float4::storeu ( float *const p ) const

Function stores *this value to the specified memory location.

Parameters

[out] p Output memory location, may have any alignment.

void avir::float4::storeu	(	float *const	p,
		int	lim
	)		const

Function stores "lim" lower elements of *this value to the specified memory location.

Parameters

[out]	p	Output memory location, may have any alignment.
	lim	The number of lower elements to store, >0.

Member Data Documentation

__m128 avir::float4::value

Packed value of 4 floats.

Public Member Functions

Static Public Member Functions

Public Attributes

Detailed Description

Member Function Documentation

Member Data Documentation