GNU Radio Manual and C++ API Reference  3.7.2.1
The Free & Open Software Radio Ecosystem
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
volk_32f_index_max_16u.h
Go to the documentation of this file.
1 #ifndef INCLUDED_volk_32f_index_max_16u_a_H
2 #define INCLUDED_volk_32f_index_max_16u_a_H
3 
4 #include <volk/volk_common.h>
5 #include <volk/volk_common.h>
6 #include <inttypes.h>
7 #include <stdio.h>
8 
9 #ifdef LV_HAVE_SSE4_1
10 #include<smmintrin.h>
11 
12 static inline void volk_32f_index_max_16u_a_sse4_1(unsigned int* target, const float* src0, unsigned int num_points) {
13  if(num_points > 0){
14  unsigned int number = 0;
15  const unsigned int quarterPoints = num_points / 4;
16 
17  float* inputPtr = (float*)src0;
18 
19  __m128 indexIncrementValues = _mm_set1_ps(4);
20  __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
21 
22  float max = src0[0];
23  float index = 0;
24  __m128 maxValues = _mm_set1_ps(max);
25  __m128 maxValuesIndex = _mm_setzero_ps();
26  __m128 compareResults;
27  __m128 currentValues;
28 
29  __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
30  __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
31 
32  for(;number < quarterPoints; number++){
33 
34  currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
35  currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
36 
37  compareResults = _mm_cmpgt_ps(maxValues, currentValues);
38 
39  maxValuesIndex = _mm_blendv_ps(currentIndexes, maxValuesIndex, compareResults);
40  maxValues = _mm_blendv_ps(currentValues, maxValues, compareResults);
41  }
42 
43  // Calculate the largest value from the remaining 4 points
44  _mm_store_ps(maxValuesBuffer, maxValues);
45  _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
46 
47  for(number = 0; number < 4; number++){
48  if(maxValuesBuffer[number] > max){
49  index = maxIndexesBuffer[number];
50  max = maxValuesBuffer[number];
51  }
52  }
53 
54  number = quarterPoints * 4;
55  for(;number < num_points; number++){
56  if(src0[number] > max){
57  index = number;
58  max = src0[number];
59  }
60  }
61  target[0] = (unsigned int)index;
62  }
63 }
64 
65 #endif /*LV_HAVE_SSE4_1*/
66 
67 #ifdef LV_HAVE_SSE
68 #include<xmmintrin.h>
69 
70 static inline void volk_32f_index_max_16u_a_sse(unsigned int* target, const float* src0, unsigned int num_points) {
71  if(num_points > 0){
72  unsigned int number = 0;
73  const unsigned int quarterPoints = num_points / 4;
74 
75  float* inputPtr = (float*)src0;
76 
77  __m128 indexIncrementValues = _mm_set1_ps(4);
78  __m128 currentIndexes = _mm_set_ps(-1,-2,-3,-4);
79 
80  float max = src0[0];
81  float index = 0;
82  __m128 maxValues = _mm_set1_ps(max);
83  __m128 maxValuesIndex = _mm_setzero_ps();
84  __m128 compareResults;
85  __m128 currentValues;
86 
87  __VOLK_ATTR_ALIGNED(16) float maxValuesBuffer[4];
88  __VOLK_ATTR_ALIGNED(16) float maxIndexesBuffer[4];
89 
90  for(;number < quarterPoints; number++){
91 
92  currentValues = _mm_load_ps(inputPtr); inputPtr += 4;
93  currentIndexes = _mm_add_ps(currentIndexes, indexIncrementValues);
94 
95  compareResults = _mm_cmpgt_ps(maxValues, currentValues);
96 
97  maxValuesIndex = _mm_or_ps(_mm_and_ps(compareResults, maxValuesIndex) , _mm_andnot_ps(compareResults, currentIndexes));
98 
99  maxValues = _mm_or_ps(_mm_and_ps(compareResults, maxValues) , _mm_andnot_ps(compareResults, currentValues));
100  }
101 
102  // Calculate the largest value from the remaining 4 points
103  _mm_store_ps(maxValuesBuffer, maxValues);
104  _mm_store_ps(maxIndexesBuffer, maxValuesIndex);
105 
106  for(number = 0; number < 4; number++){
107  if(maxValuesBuffer[number] > max){
108  index = maxIndexesBuffer[number];
109  max = maxValuesBuffer[number];
110  }
111  }
112 
113  number = quarterPoints * 4;
114  for(;number < num_points; number++){
115  if(src0[number] > max){
116  index = number;
117  max = src0[number];
118  }
119  }
120  target[0] = (unsigned int)index;
121  }
122 }
123 
124 #endif /*LV_HAVE_SSE*/
125 
126 #ifdef LV_HAVE_GENERIC
127 static inline void volk_32f_index_max_16u_generic(unsigned int* target, const float* src0, unsigned int num_points) {
128  if(num_points > 0){
129  float max = src0[0];
130  unsigned int index = 0;
131 
132  unsigned int i = 1;
133 
134  for(; i < num_points; ++i) {
135 
136  if(src0[i] > max){
137  index = i;
138  max = src0[i];
139  }
140 
141  }
142  target[0] = index;
143  }
144 }
145 
146 #endif /*LV_HAVE_GENERIC*/
147 
148 
149 #endif /*INCLUDED_volk_32f_index_max_16u_a_H*/
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:27