[ Tensor ] Add inifinity check in Tensor

This PR add inifinity value check in Tensor data. . rename the hasNaN to isValid . add infinity check in isValid Function and now it check NaN and Inf . modify to check the blas_avx and blas_neon . modify graph and model check is_valid rather than has_nan . add unittest of isValid Function **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: jijoong.moon <[email protected]>
nnstreamer · May 14, 2024 · 478e4c6 · 478e4c6
1 parent 929eab9
commit 478e4c6
Show file tree

Hide file tree

Showing 13 changed files with 250 additions and 77 deletions.
diff --git a/nntrainer/graph/network_graph.cpp b/nntrainer/graph/network_graph.cpp
@@ -404,7 +404,7 @@ bool NetworkGraph::backwarding(
    */
   auto iter_begin = getBackwardingBeginIter();
   auto iter_end = getBackwardingEndIter();
-  bool has_nan = false;
+  bool is_valid = true;
 
   /// there is no layer to train, so backwarding is essentially noop
   if (iter_begin == iter_end) {
@@ -422,16 +422,16 @@ bool NetworkGraph::backwarding(
   for (iter_ = iter_begin; iter_ != iter_end && !stop_cb(userdata); iter_++) {
     auto &ln = *iter_;
     PROFILE_TIME_START(profile_keys.at(ln->getType()));
-    has_nan = backwarding_op(ln, iteration);
+    is_valid = backwarding_op(ln, iteration);
     PROFILE_TIME_END(profile_keys.at(ln->getType()));
 
-    if (has_nan) {
+    if (!is_valid) {
       std::cout << "Gradient has NaN" << std::endl;
       break;
     }
   }
 
-  if (has_nan) {
+  if (!is_valid) {
     /** if has NaN
      * 1. reset the loss scale.
      * 2. run forwarding from cur_iter to cend() && !stop_cb(userdata);

diff --git a/nntrainer/models/neuralnet.cpp b/nntrainer/models/neuralnet.cpp
@@ -466,8 +466,8 @@ void NeuralNetwork::backwarding(int iteration,
         RunLayerContext &rc = node->getRunContext();
         if (rc.isMixedPrecision()) {
           for (auto w : rc.getWeights()) {
-            if (w->getGradientRef().hasNaN())
-              return true;
+            if (!w->getGradientRef().isValid())
+              return false;
           }
         }
       }
@@ -477,7 +477,7 @@ void NeuralNetwork::backwarding(int iteration,
     PROFILE_MEM_ANNOTATE("CalcDerivative: " + node->getName());
 
     if (stop_cb(userdata)) {
-      return false;
+      return true;
     }
 
     if (node->needsCalcDerivative()) {
@@ -498,7 +498,7 @@ void NeuralNetwork::backwarding(int iteration,
           opt_->applyGradient(opt_context);
         });
     }
-    return false;
+    return true;
   };
 
   std::function<void(Weight &, int)> lazy_apply_grad_op =
@@ -510,6 +510,7 @@ void NeuralNetwork::backwarding(int iteration,
     opt_->applyGradient(opt_context);
   };
 
+  // return false if the gradient is not valid
   bool ret = false;
 
   while (!ret) {

diff --git a/nntrainer/tensor/blas_avx.cpp b/nntrainer/tensor/blas_avx.cpp
@@ -115,101 +115,163 @@ void vcvt_f32_f16(size_t N, const float *input, void *output) {
   }
 }
 
-bool hasNaN(const size_t N, const _Float16 *input) {
+bool isValid(const size_t N, const _Float16 *input) {
   assert(N != 0);
   assert(input != NULL);
 
   int temp = 0;
   size_t idx = 0;
 
+  const __m256 SIGN_MASK = _mm256_set1_ps(-0.0);
+  const __m256 INF = _mm256_set1_ps(std::numeric_limits<float>::infinity());
+
   // 16 single-precision check : ( X != X )
   for (; N - idx >= 16; idx += 16) {
-    const __m256 vec0 =
-      _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)input));
-    const __m256 vec1 =
+    __m256 vec0 = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)input));
+    __m256 vec1 =
       _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)(input + 8)));
 
     input += 16;
 
+    // check NaN in vec0
     __m256 res = _mm256_cmp_ps(vec0, vec0, _CMP_NEQ_UQ);
     temp = temp | _mm256_movemask_ps(res);
+    if (temp)
+      return false;
+
+    // check infinity in vec0
+    vec0 = _mm256_andnot_ps(SIGN_MASK, vec0);
+    vec0 = _mm256_cmp_ps(vec0, INF, _CMP_EQ_OQ);
 
+    temp = temp | _mm256_movemask_ps(vec0);
     if (temp)
-      return true;
+      return false;
 
+    // check NaN in vec1
     __m256 res1 = _mm256_cmp_ps(vec1, vec1, _CMP_NEQ_UQ);
     temp = temp | _mm256_movemask_ps(res1);
 
     if (temp)
-      return true;
+      return false;
+
+    // check infinity in vec1
+    vec1 = _mm256_andnot_ps(SIGN_MASK, vec1);
+    vec1 = _mm256_cmp_ps(vec1, INF, _CMP_EQ_OQ);
+
+    temp = temp | _mm256_movemask_ps(vec1);
+
+    if (temp)
+      return false;
   }
 
   // 8 single-precision check : ( X != X )
   for (; N - idx >= 8; idx += 8) {
-    const __m256 vec = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)input));
+    __m256 vec = _mm256_cvtph_ps(_mm_loadu_si128((const __m128i *)input));
     input += 8;
     __m256 res = _mm256_cmp_ps(vec, vec, _CMP_NEQ_UQ);
     temp = temp | _mm256_movemask_ps(res);
 
     if (temp)
-      return true;
+      return false;
+
+    // check infinity in vec1
+    vec = _mm256_andnot_ps(SIGN_MASK, vec);
+    vec = _mm256_cmp_ps(vec, INF, _CMP_EQ_OQ);
+
+    temp = temp | _mm256_movemask_ps(vec);
+
+    if (temp)
+      return false;
   }
 
-  // remain check : ( X != X )
+  // remain check : ( X != X || X == Inf )
   while (idx < N) {
-    if (*input != *input) {
-      return true;
+    if (*input != *input || *input == std::numeric_limits<float>::infinity()) {
+      return false;
     }
     ++input;
     ++idx;
   }
 
-  return false;
+  return true;
 }
 #endif
 
-bool hasNaN(const size_t N, const float *input) {
+bool isValid(const size_t N, const float *input) {
   assert(N != 0);
   assert(input != NULL);
 
   int temp = 0;
   size_t idx = 0;
 
+  const __m256 SIGN_MASK = _mm256_set1_ps(-0.0);
+  const __m256 INF = _mm256_set1_ps(std::numeric_limits<float>::infinity());
+
   // 16 single-precision check : ( X != X )
   for (; N - idx >= 16; idx += 16) {
-    const __m256 vec0 = _mm256_loadu_ps(input);
-    const __m256 vec1 = _mm256_loadu_ps(input + 8);
+    __m256 vec0 = _mm256_loadu_ps(input);
+    __m256 vec1 = _mm256_loadu_ps(input + 8);
     input += 16;
     __m256 res = _mm256_cmp_ps(vec0, vec0, _CMP_NEQ_UQ);
     temp = temp | _mm256_movemask_ps(res);
+
+    if (temp)
+      return false;
+
+    // check infinity in vec0
+    vec0 = _mm256_andnot_ps(SIGN_MASK, vec0);
+    vec0 = _mm256_cmp_ps(vec0, INF, _CMP_EQ_OQ);
+
+    temp = temp | _mm256_movemask_ps(vec0);
+    if (temp)
+      return false;
+
     __m256 res1 = _mm256_cmp_ps(vec1, vec1, _CMP_NEQ_UQ);
     temp = temp | _mm256_movemask_ps(res1);
 
     if (temp)
-      return true;
+      return false;
+
+    // check infinity in vec1
+    vec1 = _mm256_andnot_ps(SIGN_MASK, vec1);
+    vec1 = _mm256_cmp_ps(vec1, INF, _CMP_EQ_OQ);
+
+    temp = temp | _mm256_movemask_ps(vec1);
+
+    if (temp)
+      return false;
   }
 
   // 8 single-precision check : ( X != X )
   for (; N - idx >= 8; idx += 8) {
-    const __m256 vec = _mm256_loadu_ps(input);
+    __m256 vec = _mm256_loadu_ps(input);
     input += 8;
     __m256 res = _mm256_cmp_ps(vec, vec, _CMP_NEQ_UQ);
     temp = temp | _mm256_movemask_ps(res);
 
     if (temp)
-      return true;
+      return false;
+
+    // check infinity in vec
+    vec = _mm256_andnot_ps(SIGN_MASK, vec);
+    vec = _mm256_cmp_ps(vec, INF, _CMP_EQ_OQ);
+
+    temp = temp | _mm256_movemask_ps(vec);
+
+    if (temp)
+      return false;
   }
 
   // remain check : ( X != X )
   while (idx < N) {
-    if (*input != *input) {
-      return true;
+    if (*input != *input || *input == std::numeric_limits<float>::infinity()) {
+      return false;
     }
     ++input;
     ++idx;
   }
 
-  return false;
+  return true;
 }
 
 } // namespace nntrainer::avx
diff --git a/nntrainer/tensor/blas_avx.h b/nntrainer/tensor/blas_avx.h
@@ -43,22 +43,22 @@ void vcvt_f32_f16(size_t N, const float *input, void *output);
 
 /**
  * @brief     check if the X has NaN value
- * @note it compare !(x==x)
+ * @note it compare (x!=x || x == inf)
  * @param[in] N  length of the vector
  * @param[in] X half-precision * for Vector X
- * @param[out] true if it has NaN
+ * @param[out] false if it has NaN or inf
  */
-bool hasNaN(const size_t N, const _Float16 *X);
+bool isValid(const size_t N, const _Float16 *X);
 #endif
 
 /**
  * @brief     check if the X has NaN value
- * @note it compare !(x==x)
+ * @note it compare (x!=x || x == inf)
  * @param[in] N  length of the vector
  * @param[in] X float * for Vector X
- * @param[out] true if it has NaN
+ * @param[out] false if it has NaN or inf
  */
-bool hasNaN(const size_t N, const float *X);
+bool isValid(const size_t N, const float *X);
 
 } // namespace nntrainer::avx
 

diff --git a/nntrainer/tensor/blas_interface.cpp b/nntrainer/tensor/blas_interface.cpp
@@ -1038,14 +1038,14 @@ static void ele_div_fallback(const unsigned int N, const float *X,
   }
 }
 
-static bool has_nan_fallback(const size_t N, const float *X) {
+static bool is_valid_fallback(const size_t N, const float *X) {
   for (size_t i = 0; i < N; ++i) {
-    if (*X != *X)
-      return true;
+    if (*X != *X || *X == std::numeric_limits<float>::infinity())
+      return false;
     ++X;
   }
 
-  return false;
+  return true;
 }
 
 void ele_mul(const unsigned int N, const float *X, const float *Y, float *Z,
@@ -1100,28 +1100,28 @@ void ele_div(const unsigned int N, const float *X, const float *Y, float *Z,
     ele_div_fallback(N, X, Y, Z, alpha, beta, i_stride, o_stride);
 }
 
-bool has_nan(const size_t N, ml::train::TensorDim::DataType d_type,
-             const void *X) {
+bool is_valid(const size_t N, ml::train::TensorDim::DataType d_type,
+              const void *X) {
   if (d_type == ml::train::TensorDim::DataType::FP16) {
 #ifdef ENABLE_FP16
     const _FP16 *vec = (const _FP16 *)X;
 #ifdef USE_NEON
-    return nntrainer::neon::hasNaN(N, vec);
+    return nntrainer::neon::isValid(N, vec);
 #elif defined(USE_AVX)
-    return nntrainer::avx::hasNaN(N, vec);
+    return nntrainer::avx::isValid(N, vec);
 #else
     throw std::invalid_argument("Error: enable-fp16 is not enabled");
 #endif
 #endif
   } else if (d_type == ml::train::TensorDim::DataType::FP32) {
     const float *vec = (const float *)X;
 #ifdef USE_NEON
-    return nntrainer::neon::hasNaN(N, vec);
+    return nntrainer::neon::isValid(N, vec);
 #elif defined(USE_AVX)
-    return nntrainer::avx::hasNaN(N, vec);
+    return nntrainer::avx::isValid(N, vec);
 #endif
 
-    return has_nan_fallback(N, vec);
+    return is_valid_fallback(N, vec);
   }
   return false;
 }

diff --git a/nntrainer/tensor/blas_interface.h b/nntrainer/tensor/blas_interface.h
@@ -480,13 +480,13 @@ void ele_div(const unsigned N, const float *X, const float *Y, float *Z,
              unsigned int o_stride = 1);
 
 /**
- * @brief     check if X array has NaN
+ * @brief     check if X array has NaN or inf
  * @param[in] N  length of the vector
  * @param[in] X float/fp16 * for Vector X
- * @param[out] bool true if NaN else false
+ * @param[out] bool false if not valide else true
  */
-bool has_nan(const size_t N, ml::train::TensorDim::DataType d_type,
-             const void *X);
+bool is_valid(const size_t N, ml::train::TensorDim::DataType d_type,
+              const void *X);
 
 } /* namespace nntrainer */
 #endif /* __cplusplus */