#include <avir_float4_sse.h>
Public Member Functions | |
float4 (const float4 &s) | |
float4 (const __m128 s) | |
float4 (const float s) | |
float | hadd () const |
operator float () const | |
float4 | operator* (const float4 &s) const |
float4 & | operator*= (const float4 &s) |
float4 | operator+ (const float4 &s) const |
float4 & | operator+= (const float4 &s) |
float4 | operator- (const float4 &s) const |
float4 & | operator-= (const float4 &s) |
float4 | operator/ (const float4 &s) const |
float4 & | operator/= (const float4 &s) |
float4 & | operator= (const float4 &s) |
float4 & | operator= (const __m128 s) |
float4 & | operator= (const float s) |
void | store (float *const p) const |
void | storeu (float *const p) const |
void | storeu (float *const p, int lim) const |
Static Public Member Functions | |
static void | addu (float *const p, const float4 &v) |
static void | addu (float *const p, const float4 &v, const int lim) |
static float4 | load (const float *const p) |
static float4 | loadu (const float *const p) |
static float4 | loadu (const float *const p, int lim) |
Public Attributes | |
__m128 | value |
SIMD packed 4-float type.
This class implements a packed 4-float type that can be used to perform parallel computation using SIMD instructions on SSE-enabled processors. This class can be used as the "fptype" argument of the avir::fpclass_def class.
|
static |
Function performs in-place addition of a value located in memory and the specified value.
p | Pointer to value where addition happens. May be unaligned. |
v | Value to add. |
|
static |
Function performs in-place addition of a value located in memory and the specified value. Limited to the specfied number of elements.
p | Pointer to value where addition happens. May be unaligned. |
v | Value to add. |
lim | The element number limit, >0. |
float avir::float4::hadd | ( | ) | const |
|
static |
p | Pointer to memory from where the value should be loaded, should be 16-byte aligned. |
|
static |
p | Pointer to memory from where the value should be loaded, may have any alignment. |
|
static |
p | Pointer to memory from where the value should be loaded, may have any alignment. |
lim | The maximum number of elements to load, >0. |
void avir::float4::store | ( | float *const | p | ) | const |
Function stores *this value to the specified memory location.
[out] | p | Output memory location, should be 16-byte aligned. |
void avir::float4::storeu | ( | float *const | p | ) | const |
Function stores *this value to the specified memory location.
[out] | p | Output memory location, may have any alignment. |
void avir::float4::storeu | ( | float *const | p, |
int | lim | ||
) | const |
Function stores "lim" lower elements of *this value to the specified memory location.
[out] | p | Output memory location, may have any alignment. |
lim | The number of lower elements to store, >0. |
__m128 avir::float4::value |
Packed value of 4 floats.