diff --git a/include/linalg/_dgemm.hpp b/include/linalg/_dgemm.hpp index 67d2155af..4f10342f3 100644 --- a/include/linalg/_dgemm.hpp +++ b/include/linalg/_dgemm.hpp @@ -59,39 +59,6 @@ class DGEMM { static double DGEMM_BUFF_C[BLOCK_SZ_MR * BLOCK_SZ_NR] __attribute__((aligned(16))); - /** - * @brief Performs matrix-matrix multiplication (DGEMM) using an - * assembly implementation It computes the product of matrices A and B, - * scaled by alpha and beta, and stores the result in matrix C - * - * @param A Pointer to the first matrix (A) in row-major order - * @param B Pointer to the second matrix (B) in row-major order - * @param C Pointer to the result matrix (C) in row-major order - * @param nextA Pointer to the next matrix A - * @param nextB Pointer to the next matrix B - * @param kl Value representing the remaining columns of matrix A - * @param kb Value representing the remaining rows of matrix B - * @param incRowC Increment for moving to the next row of matrix C - * @param incColC Increment for moving to the next column of matrix C - * @param alpha Scalar value to scale the product of matrices A and B - * @param beta Scalar value to scale matrix C before adding the product - * - * @note This calls an Assembly implementation depending on detected - * host system. x86 (SSE, AVX2) and ARM NEON supported - */ - /*void dgemm_kernel_asm(const double *A, - const double *B, - double *C, - const double *nextA, - const double *nextB, - long kl, - long kb, - long incRowC, - long incColC, - double alpha, - double beta); -*/ - /** * @brief Packs micro panels of size BLOCK_SZ_MR rows by k columns from A * without padding diff --git a/modules/linalg/dgemm_arr.cpp b/modules/linalg/dgemm_arr.cpp index ac732c51d..43531acc5 100644 --- a/modules/linalg/dgemm_arr.cpp +++ b/modules/linalg/dgemm_arr.cpp @@ -48,6 +48,26 @@ extern "C" { #endif // ASM micro kernel function +/** + * @brief Performs matrix-matrix multiplication (DGEMM) using an + * assembly implementation It computes the product of matrices A and B, + * scaled by alpha and beta, and stores the result in matrix C + * + * @param A Pointer to the first matrix (A) in row-major order + * @param B Pointer to the second matrix (B) in row-major order + * @param C Pointer to the result matrix (C) in row-major order + * @param nextA Pointer to the next matrix A + * @param nextB Pointer to the next matrix B + * @param kl Value representing the remaining columns of matrix A + * @param kb Value representing the remaining rows of matrix B + * @param incRowC Increment for moving to the next row of matrix C + * @param incColC Increment for moving to the next column of matrix C + * @param alpha Scalar value to scale the product of matrices A and B + * @param beta Scalar value to scale matrix C before adding the product + * + * @note This calls an Assembly implementation depending on detected + * host system. x86 (SSE, AVX2) and ARM NEON supported + */ extern void dgemm_kernel_asm(const double *A, const double *B, double *C, diff --git a/modules/linalg/dgemm_asm.h b/modules/linalg/dgemm_asm.h deleted file mode 100644 index 02667c3bc..000000000 --- a/modules/linalg/dgemm_asm.h +++ /dev/null @@ -1,4 +0,0 @@ -#ifndef DGEMM_ASM_H -#define DGEMM_ASM_H - -#endif diff --git a/tests/linalg/t_matrix_arr_f64.cpp b/tests/linalg/t_matrix_arr_f64.cpp index 95f4a74d9..3430f9190 100644 --- a/tests/linalg/t_matrix_arr_f64.cpp +++ b/tests/linalg/t_matrix_arr_f64.cpp @@ -18,7 +18,7 @@ const double TOLERANCE = 1e-3; using namespace gpmp; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" namespace { diff --git a/tests/linalg/t_matrix_arr_f90.cpp b/tests/linalg/t_matrix_arr_f90.cpp index 2da14b370..0cd49396d 100644 --- a/tests/linalg/t_matrix_arr_f90.cpp +++ b/tests/linalg/t_matrix_arr_f90.cpp @@ -16,7 +16,7 @@ using namespace gpmp; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" namespace { TEST(FORTRAN90MatrixArrayTestI32, AdditionPerformanceComparison) { diff --git a/tests/linalg/t_matrix_arr_i16.cpp b/tests/linalg/t_matrix_arr_i16.cpp index b9178e768..af52cee8a 100644 --- a/tests/linalg/t_matrix_arr_i16.cpp +++ b/tests/linalg/t_matrix_arr_i16.cpp @@ -16,7 +16,7 @@ using namespace gpmp; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" namespace { TEST(MatrixArrayTestI16, AdditionComparisonSmall) { diff --git a/tests/linalg/t_matrix_arr_i32.cpp b/tests/linalg/t_matrix_arr_i32.cpp index 8d2f726a5..47824e11a 100644 --- a/tests/linalg/t_matrix_arr_i32.cpp +++ b/tests/linalg/t_matrix_arr_i32.cpp @@ -16,7 +16,7 @@ using namespace gpmp; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" namespace { TEST(MatrixArrayTestI32, AdditionComparisonSmall) { diff --git a/tests/linalg/t_matrix_arr_i8.cpp b/tests/linalg/t_matrix_arr_i8.cpp index 356192fcd..21c5b0c8c 100644 --- a/tests/linalg/t_matrix_arr_i8.cpp +++ b/tests/linalg/t_matrix_arr_i8.cpp @@ -16,7 +16,7 @@ using namespace gpmp; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" namespace { TEST(MatrixArrayTestI8, AdditionComparisonSmall) { diff --git a/tests/linalg/t_matrix_arr_naive.cpp b/tests/linalg/t_matrix_arr_naive.cpp index a8bf13fc0..002e0a599 100644 --- a/tests/linalg/t_matrix_arr_naive.cpp +++ b/tests/linalg/t_matrix_arr_naive.cpp @@ -13,7 +13,7 @@ using namespace gpmp; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" namespace { TEST(MatrixArrayTest, BasicTest) { INFO_COUT << "MATRIX (as Arrays) NAIVE" << std::endl; diff --git a/tests/linalg/t_matrix_vector_f64.cpp b/tests/linalg/t_matrix_vector_f64.cpp index a0b8f46ad..399f940c0 100644 --- a/tests/linalg/t_matrix_vector_f64.cpp +++ b/tests/linalg/t_matrix_vector_f64.cpp @@ -16,7 +16,7 @@ using namespace gpmp; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" namespace { TEST(MatrixVectorTestF64, AdditionComparisonSmall) { diff --git a/tests/linalg/t_matrix_vector_i32.cpp b/tests/linalg/t_matrix_vector_i32.cpp index fac655c5d..9a2e49333 100644 --- a/tests/linalg/t_matrix_vector_i32.cpp +++ b/tests/linalg/t_matrix_vector_i32.cpp @@ -16,7 +16,7 @@ using namespace gpmp; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" namespace { // test case to compare the results of the intrinsics implementation with the diff --git a/tests/linalg/t_mtx.f90 b/tests/linalg/t_mtx.f90 index 986ac461a..beacd71e2 100644 --- a/tests/linalg/t_mtx.f90 +++ b/tests/linalg/t_mtx.f90 @@ -62,7 +62,7 @@ subroutine test_mtx_add() do i = 1, nrows do j = 1, ncols if (c(i, j) /= a(i, j) + b(i, j)) then - print *, ''//achar(27)//'[31m [!] LINALG MTX ADD (FLOAT) FAILED'//achar(27)//'[0m' + print *, ''//achar(27)//'[34m [!] LINALG MTX ADD (FLOAT) FAILED'//achar(27)//'[0m' failed = .true. exit end if @@ -76,7 +76,7 @@ subroutine test_mtx_add() do i = 1, nrows do j = 1, ncols if (c(i, j) /= a(i, j) + b(i, j)) then - print *, ''//achar(27)//'[31m [!] LINALG MTX ADD (INT) FAILED'//achar(27)//'[0m' + print *, ''//achar(27)//'[34m [!] LINALG MTX ADD (INT) FAILED'//achar(27)//'[0m' failed = .true. exit end if diff --git a/tests/linalg/t_vector_vector_f64.cpp b/tests/linalg/t_vector_vector_f64.cpp index dd48ddfdd..c4c5a94a2 100644 --- a/tests/linalg/t_vector_vector_f64.cpp +++ b/tests/linalg/t_vector_vector_f64.cpp @@ -13,9 +13,9 @@ const double TOLERANCE = 1e-3; -#define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" +#define TEST_COUT std::cout << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" TEST(VectorVectorTestF64, Addition) { INFO_COUT << "Vector (as Vectors) FLOAT64" << std::endl; diff --git a/tests/linalg/t_vector_vector_i16.cpp b/tests/linalg/t_vector_vector_i16.cpp index 6cba052ca..aba3c432f 100644 --- a/tests/linalg/t_vector_vector_i16.cpp +++ b/tests/linalg/t_vector_vector_i16.cpp @@ -15,7 +15,7 @@ const double TOLERANCE = 1e-3; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" /*****************************************************************************/ /** VECTOR TESTS */ diff --git a/tests/linalg/t_vector_vector_i32.cpp b/tests/linalg/t_vector_vector_i32.cpp index 4923d5e7e..1519887d4 100644 --- a/tests/linalg/t_vector_vector_i32.cpp +++ b/tests/linalg/t_vector_vector_i32.cpp @@ -15,7 +15,7 @@ const double TOLERANCE = 1e-3; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" /*****************************************************************************/ /** VECTOR TESTS */ diff --git a/tests/linalg/t_vector_vector_i64.cpp b/tests/linalg/t_vector_vector_i64.cpp index e5b4f1df3..c30d1e754 100644 --- a/tests/linalg/t_vector_vector_i64.cpp +++ b/tests/linalg/t_vector_vector_i64.cpp @@ -15,7 +15,7 @@ const double TOLERANCE = 1e-3; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" /*****************************************************************************/ /** VECTOR TESTS */ diff --git a/tests/linalg/t_vector_vector_i8.cpp b/tests/linalg/t_vector_vector_i8.cpp index 89e01e0e8..618feadf1 100644 --- a/tests/linalg/t_vector_vector_i8.cpp +++ b/tests/linalg/t_vector_vector_i8.cpp @@ -15,7 +15,7 @@ const double TOLERANCE = 1e-3; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" /*****************************************************************************/ /** VECTOR TESTS */ diff --git a/tests/linalg/t_vector_vector_naive.cpp b/tests/linalg/t_vector_vector_naive.cpp index c6d9d4ae9..92174662c 100644 --- a/tests/linalg/t_vector_vector_naive.cpp +++ b/tests/linalg/t_vector_vector_naive.cpp @@ -15,7 +15,7 @@ const double TOLERANCE = 1e-3; #define TEST_COUT std::cerr << "\033[32m[ ] [ INFO ] \033[0m" #define INFO_COUT \ - std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;31m\033[1m" + std::cerr << "\033[32m[ ] [ INFO ] \033[0m\033[1;34m\033[1m" TEST(VectorVectorTestDouble, Addition) { INFO_COUT << "Vector (as Vectors) NAIVE" << std::endl;