gnuradio · JVital2013 · Aug 31, 2024 · marcusmueller · Sep 5, 2024 · jdemel
diff --git a/gen/archs.xml b/gen/archs.xml
@@ -85,6 +85,7 @@ at the top, as a last resort.
 <arch name="neon">
   <flag compiler="gnu">-funsafe-math-optimizations</flag>
   <flag compiler="clang">-funsafe-math-optimizations</flag>
+  <flag compiler="msvc"> </flag>
   <alignment>16</alignment>
   <check name="neon"></check>
 </arch>
@@ -101,6 +102,7 @@ at the top, as a last resort.
 <arch name="neonv8">
   <flag compiler="gnu">-funsafe-math-optimizations</flag>
   <flag compiler="clang">-funsafe-math-optimizations</flag>
+  <flag compiler="msvc"> </flag>
   <alignment>16</alignment>
   <check name="neon"></check>
 </arch>

diff --git a/kernels/volk/volk_32f_index_max_32u.h b/kernels/volk/volk_32f_index_max_32u.h
@@ -299,7 +299,11 @@ volk_32f_index_max_32u_neon(uint32_t* target, const float* src0, uint32_t num_po
             if (maxValuesBuffer[number] > max) {
                 index = maxIndexesBuffer[number];
                 max = maxValuesBuffer[number];
+#ifdef _MSC_VER
+            } else if (maxValues.n128_f32[number] == max) {
+#else
             } else if (maxValues[number] == max) {
+#endif
                 if (index > maxIndexesBuffer[number])
                     index = maxIndexesBuffer[number];
             }

diff --git a/kernels/volk/volk_32f_index_min_32u.h b/kernels/volk/volk_32f_index_min_32u.h
@@ -284,7 +284,11 @@ volk_32f_index_min_32u_neon(uint32_t* target, const float* source, uint32_t num_
         if (minValuesBuffer[number] < min) {
             index = minIndexesBuffer[number];
             min = minValuesBuffer[number];
+#ifdef _MSC_VER
+        } else if (minValues.n128_f32[number] == min) {
+#else
         } else if (minValues[number] == min) {
+#endif
             if (index > minIndexesBuffer[number])
                 index = minIndexesBuffer[number];
         }

diff --git a/kernels/volk/volk_32fc_accumulator_s32fc.h b/kernels/volk/volk_32fc_accumulator_s32fc.h
@@ -229,10 +229,10 @@ static inline void volk_32fc_accumulator_s32fc_neon(lv_32fc_t* result,
     lv_32fc_t returnValue = lv_cmake(0.f, 0.f);
     unsigned int eighthPoints = num_points / 8;
     float32x4_t in_vec;
-    float32x4_t out_vec0 = { 0.f, 0.f, 0.f, 0.f };
-    float32x4_t out_vec1 = { 0.f, 0.f, 0.f, 0.f };
-    float32x4_t out_vec2 = { 0.f, 0.f, 0.f, 0.f };
-    float32x4_t out_vec3 = { 0.f, 0.f, 0.f, 0.f };
+    float32x4_t out_vec0 = { 0.f };
+    float32x4_t out_vec1 = { 0.f };
+    float32x4_t out_vec2 = { 0.f };
+    float32x4_t out_vec3 = { 0.f };
     __VOLK_ATTR_ALIGNED(32) float tempBuffer[4];
 
     for (; number < eighthPoints; number++) {

diff --git a/kernels/volk/volk_32fc_convert_16ic.h b/kernels/volk/volk_32fc_convert_16ic.h
@@ -236,7 +236,7 @@ static inline void volk_32fc_convert_16ic_neonv8(lv_16sc_t* outputVector,
     const float32x4_t max_val = vmovq_n_f32(max_val_f);
     float32x4_t ret1, ret2, a, b;
 
-    int32x4_t toint_a = { 0, 0, 0, 0 }, toint_b = { 0, 0, 0, 0 };
+    int32x4_t toint_a = { 0 }, toint_b = { 0 };
     int16x4_t intInputVal1, intInputVal2;
     int16x8_t res;
 

diff --git a/kernels/volk/volk_32u_byteswap.h b/kernels/volk/volk_32u_byteswap.h
@@ -201,7 +201,10 @@ static inline void volk_32u_byteswap_neonv8(uint32_t* intsToSwap, unsigned int n
     uint32_t* inputPtr = (uint32_t*)intsToSwap;
     const unsigned int n8points = num_points / 8;
     uint8x16_t input;
-    uint8x16_t idx = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+
+    uint8x16_t idx;
+    const uint8_t idx_data[] = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+    idx = vld1q_u8(idx_data);
 
     unsigned int number = 0;
     for (number = 0; number < n8points; ++number) {

diff --git a/kernels/volk/volk_32u_reverse_32u.h b/kernels/volk/volk_32u_reverse_32u.h
@@ -262,7 +262,9 @@ volk_32u_reverse_32u_neonv8(uint32_t* out, const uint32_t* in, unsigned int num_
     const uint32_t* in_ptr = in;
     uint32_t* out_ptr = out;
 
-    const uint8x16_t idx = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+    uint8x16_t idx;
+    const uint8_t idx_data[] = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
+    idx = vld1q_u8(idx_data);
 
     const unsigned int quarterPoints = num_points / 4;
     unsigned int number = 0;
@@ -290,8 +292,15 @@ volk_32u_reverse_32u_neonv8(uint32_t* out, const uint32_t* in, unsigned int num_
 
 #ifdef LV_HAVE_NEON
 #include <arm_neon.h>
-
-#if defined(__aarch64__)
+#ifdef _MSC_VER
+#define DO_RBIT                                                                 \
+    *out_ptr = _byteswap_ulong(*in_ptr);                                        \
+    *out_ptr = ((*out_ptr & 0x55555555) << 1) | ((*out_ptr & 0xAAAAAAAA) >> 1); \
+    *out_ptr = ((*out_ptr & 0x33333333) << 2) | ((*out_ptr & 0xCCCCCCCC) >> 2); \
+    *out_ptr = ((*out_ptr & 0x0F0F0F0F) << 4) | ((*out_ptr & 0xF0F0F0F0) >> 4); \
+    in_ptr++;                                                                   \
+    out_ptr++;
+#elif defined(__aarch64__)
 #define DO_RBIT                             \
     __VOLK_ASM("rbit %w[result], %w[value]" \
                : [result] "=r"(*out_ptr)    \

diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt
@@ -221,20 +221,24 @@ check_c_source_compiles(
 
 if(neon_compile_result)
     set(CMAKE_REQUIRED_INCLUDES ${PROJECT_SOURCE_DIR}/include)
+    if(MSVC)
+        if(CMAKE_SYSTEM_PROCESSOR STREQUAL "ARM")
+            overrule_arch(neonv8 "Compiler doesn't support neonv8")
+        endif()
+    else(MSVC)
+        check_c_source_compiles(
+            "#include <volk/volk_common.h>\n int main(){__VOLK_ASM(\"sub v1.4s,v1.4s,v1.4s\");}"
+            have_neonv8_result)
+        if(NOT have_neonv8_result)
+            overrule_arch(neonv8 "Compiler doesn't support neonv8")
+        endif()
+    endif(MSVC)
     check_c_source_compiles(
         "#include <volk/volk_common.h>\n int main(){__VOLK_ASM(\"vrev32.8 q0, q0\");}"
         have_neonv7_result)
-    check_c_source_compiles(
-        "#include <volk/volk_common.h>\n int main(){__VOLK_ASM(\"sub v1.4s,v1.4s,v1.4s\");}"
-        have_neonv8_result)
-
     if(NOT have_neonv7_result)
         overrule_arch(neonv7 "Compiler doesn't support neonv7")
     endif()
-
-    if(NOT have_neonv8_result)
-        overrule_arch(neonv8 "Compiler doesn't support neonv8")
-    endif()
 else(neon_compile_result)
     overrule_arch(neon "Compiler doesn't support NEON")
     overrule_arch(neonv7 "Compiler doesn't support NEON")