SIMD packed 4-float type.
More...
#include <avir_float4_sse.h>
|
| float4 (const __m128 s) |
|
| float4 (const double s) |
|
| float4 (const float s) |
|
| float4 (const float4 &s) |
|
| float4 (const int s) |
|
float | hadd () const |
| Returns horizontal sum of elements.
|
|
| operator float () const |
|
float4 | operator* (const float4 &s) const |
|
float4 & | operator*= (const float4 &s) |
|
float4 | operator+ (const float4 &s) const |
|
float4 & | operator+= (const float4 &s) |
|
float4 | operator- (const float4 &s) const |
|
float4 & | operator-= (const float4 &s) |
|
float4 | operator/ (const float4 &s) const |
|
float4 & | operator/= (const float4 &s) |
|
float4 & | operator= (const __m128 s) |
|
float4 & | operator= (const float s) |
|
float4 & | operator= (const float4 &s) |
|
void | store (float *const p) const |
| Stores this value to the specified memory location.
|
|
void | storeu (float *const p) const |
| Stores this value to the specified memory location.
|
|
void | storeu (float *const p, int lim) const |
| Stores "lim" lower elements of this value to the specified memory location.
|
|
|
static void | addu (float *const p, const float4 &v) |
| Performs in-place addition of a value located in memory, and the specified value.
|
|
static void | addu (float *const p, const float4 &v, const int lim) |
| Performs in-place addition of a value located in memory, and the specified value. Limited to the specfied number of elements.
|
|
static float4 | load (const float *const p) |
| Returns float4 value loaded from the specified memory location.
|
|
static float4 | loadu (const float *const p) |
| Returns float4 value loaded from the specified memory location.
|
|
static float4 | loadu (const float *const p, int lim) |
| Returns float4 value loaded from the specified memory location, with elements beyond "lim" set to 0.
|
|
|
__m128 | value |
| Packed value of 4 floats.
|
|
SIMD packed 4-float type.
This class implements a packed 4-float type that can be used to perform parallel computation using SIMD instructions on SSE-enabled processors. This class can be used as the "fptype" argument of the avir::fpclass_def class.
◆ addu() [1/2]
static void avir::float4::addu |
( |
float *const | p, |
|
|
const float4 & | v ) |
|
static |
Performs in-place addition of a value located in memory, and the specified value.
- Parameters
-
p | Pointer to value where addition happens. May be unaligned. |
v | Value to add. |
◆ addu() [2/2]
static void avir::float4::addu |
( |
float *const | p, |
|
|
const float4 & | v, |
|
|
const int | lim ) |
|
static |
Performs in-place addition of a value located in memory, and the specified value. Limited to the specfied number of elements.
- Parameters
-
p | Pointer to value where addition happens. May be unaligned. |
v | Value to add. |
lim | The element number limit, greater than 0. |
◆ load()
static float4 avir::float4::load |
( |
const float *const | p | ) |
|
|
static |
Returns float4 value loaded from the specified memory location.
- Parameters
-
p | Pointer to memory from where the value should be loaded, should be 16-byte aligned. |
- Returns
- Loaded value.
◆ loadu() [1/2]
static float4 avir::float4::loadu |
( |
const float *const | p | ) |
|
|
static |
Returns float4 value loaded from the specified memory location.
- Parameters
-
p | Pointer to memory from where the value should be loaded, may have any alignment. |
- Returns
- Loaded value.
◆ loadu() [2/2]
static float4 avir::float4::loadu |
( |
const float *const | p, |
|
|
int | lim ) |
|
static |
Returns float4 value loaded from the specified memory location, with elements beyond "lim" set to 0.
- Parameters
-
p | Pointer to memory from where the value should be loaded, may have any alignment. |
lim | The maximum number of elements to load, greater than 0. |
- Returns
- Loaded value.
◆ store()
void avir::float4::store |
( |
float *const | p | ) |
const |
Stores this value to the specified memory location.
- Parameters
-
[out] | p | Output memory location, should be 16-byte aligned. |
◆ storeu() [1/2]
void avir::float4::storeu |
( |
float *const | p | ) |
const |
Stores this value to the specified memory location.
- Parameters
-
[out] | p | Output memory location, may have any alignment. |
◆ storeu() [2/2]
void avir::float4::storeu |
( |
float *const | p, |
|
|
int | lim ) const |
Stores "lim" lower elements of this value to the specified memory location.
- Parameters
-
[out] | p | Output memory location, may have any alignment. |
| lim | The number of lower elements to store, greater than 0. |