Programming Example

This sample program uses the F32vec4 class to average the elements of a 20 element floating point array. This code is also provided as a sample in the file, AvgClass.cpp.

// Include Streaming SIMD Extension Class

 

Definitions

#include <fvec.h>

 

// Shuffle any 2 single precision floating point from a
// into low 2 SP FP and shuffle any 2 SP FP from b
// into high 2 SP FP of destination

 

#define SHUFFLE(a,b,i) (F32vec4)_mm_shuffle_ps(a,b,i)
#include <stdio.h>
#define SIZE 20

 

// Global variables

float result;
_MM_ALIGN 16 float array[SIZE];

 

//*****************************************************************
// Function: Add20ArrayElements
// Add all the elements of a 20 element array
//*****************************************************************

 

void Add20ArrayElements (F32vec4 *array, float *result)

{
F32vec4 vec0, vec1;
vec0 = _mm_load_ps ((float *) array);

 

// Load array's first 4 floats

 

//*****************************************************
// Add all elements of the array, 4 elements at a time
//******************************************************

 

vec0 += array[1];// Add elements 5-8
vec0 += array[2];// Add elements 9-12
vec0 += array[3];// Add elements 13-16
vec0 += array[4];// Add elements 17-20

 

//*****************************************************************
// There are now 4 partial sums. Add the 2 lowers to the 2 raises,
// then add those 2 results together
//*****************************************************************

 

vec1 = SHUFFLE(vec1, vec0, 0x40);
vec0 += vec1;
vec1 = SHUFFLE(vec1, vec0, 0x30);
vec0 += vec1;
vec0 = SHUFFLE(vec0, vec0, 2);

_mm_store_ss (result, vec0); // Store the final sum

}
 

void main(int argc, char *argv[])
{

int i;
 

// Initialize the array

for (i=0; i < SIZE; i++)

{
  array[i] = (float) i;
}

 

// Call function to add all array elements
Add20ArrayElements(array, &result);

 

// Print average array element value
printf ("Average of all array values = %f\n", result/20.);
printf ("The correct answer is %f\n\n\n", 9.5);

}