Skip to content

Commit

Permalink
#EDITS: updates to linalg module and file seperation
Browse files Browse the repository at this point in the history
  • Loading branch information
akielaries committed Feb 18, 2024
1 parent eb62e0e commit 8423b74
Show file tree
Hide file tree
Showing 6 changed files with 131 additions and 18 deletions.
13 changes: 13 additions & 0 deletions include/linalg/mtx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,19 @@ class Mtx {
* designated Fortran subroutine. Accepts type int
*/
void mtx_add_f90(int *A, int *B, int *C, std::size_t matrixSize);

void mtx_mult_f90(int *A,
int *B,
int *C,
std::size_t rows_a,
std::size_t cols_a,
std::size_t cols_b);

/* void mtx_mult_f90(int *A,
int *B,
int *C,
std::size_t mtx_size);
*/
#endif

#if defined(__x86_64__) || defined(i386) || defined(__i386__) || \
Expand Down
17 changes: 17 additions & 0 deletions modules/linalg/mtx_arr_f90.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ void mtx_add_routine_float_(float *A,

// Matrix add routine (INT)
void mtx_add_routine_int_(int *A, int *B, int *C, std::size_t *mtx_size);

void mtx_mult_routine_int_(int *A,
int *B,
int *C,
std::size_t *rows_a,
std::size_t *cols_a,
std::size_t *cols_b);
}

// C++ wrapper for Fortran mtx addition subroutine FLOAT
Expand All @@ -82,4 +89,14 @@ void gpmp::linalg::Mtx::mtx_add_f90(int *A,
mtx_add_routine_int_(A, B, C, &mtx_size);
}

// C++ wrapper for Fortran mtx multiplication subroutine INT
void gpmp::linalg::Mtx::mtx_mult_f90(int *A,
int *B,
int *C,
std::size_t rows_a,
std::size_t cols_a,
std::size_t cols_b) {
mtx_mult_routine_int_(A, B, C, &rows_a, &cols_a, &cols_b);
}

#endif
10 changes: 4 additions & 6 deletions modules/linalg/mtx_avx2_arr_i32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -203,11 +203,11 @@ void gpmp::linalg::Mtx::mtx_mult(const int *A,

// Handle remaining elements
for (int j = cols_b - cols_b % 4; j < cols_b; ++j) {
long long sum = 0;
int64_t sum = 0;

for (int k = 0; k < cols_a; ++k) {
sum += static_cast<long long>(A[i * cols_a + k]) *
B[k * cols_b + j];
sum +=
static_cast<int64_t>(A[i * cols_a + k]) * B[k * cols_b + j];
}

C[i * cols_b + j] = sum;
Expand All @@ -220,9 +220,7 @@ void gpmp::linalg::Mtx::mtx_tpose(const int *A, int *C, int rows, int cols) {
for (int i = 0; i < rows; i += 8) {
for (int j = 0; j < cols; j += 8) {
// Load 8x8 block from A
//__m256i a0 = _mm256_loadu_si256((__m256i *)(A + i * cols + j));
__m256i a0 = _mm256_loadu_si256((const __m256i *)(A + i * cols + j));

__m256i a0 = _mm256_loadu_si256((__m256i *)(A + i * cols + j));
__m256i a1 =
_mm256_loadu_si256((__m256i *)(A + (i + 1) * cols + j));
__m256i a2 =
Expand Down
44 changes: 37 additions & 7 deletions modules/linalg/mtx_routines.f90
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
! mtx_routines.f90

!> FORTRAN Subroutine for Matrix Addition on flattened matrices as arrays
!! of type float. Contains C++ wrapper function
!! of type float32. Contains C++ wrapper function
!! @param A Addend A, an array representing a Matrix
!! @param B Addend B, an array representing a Matrix
!! @param C Sum C, an array representing the sum of A + B
Expand All @@ -49,6 +49,12 @@ SUBROUTINE mtx_add_routine_float_(A, B, C, mtx_size) bind(C)
C = A + B
END SUBROUTINE mtx_add_routine_float_

!> FORTRAN Subroutine for Matrix Addition on flattened matrices as arrays
!! of type int32. Contains C++ wrapper function
!! @param A Addend A, an array representing a Matrix
!! @param B Addend B, an array representing a Matrix
!! @param C Sum C, an array representing the sum of A + B
!! @param mtx_size Assumes same size M x N
SUBROUTINE mtx_add_routine_int_(A, B, C, mtx_size) bind(C)
USE :: ISO_FORTRAN_ENV
USE :: ISO_C_BINDING
Expand All @@ -67,21 +73,45 @@ END SUBROUTINE mtx_add_routine_int_
!! @param c Product c, an array representing the sum of a + b
!! @param nrows_a Number of rows
!! @param ncols Number of columns
SUBROUTINE mtx_mult(matrix1, matrix2, result, nrows1, ncols1, ncols2)
SUBROUTINE Bmtx_mult_routine_int_(A, B, C, nrows1, ncols1, ncols2) bind(C)
USE :: ISO_FORTRAN_ENV
USE :: ISO_C_BINDING

implicit none
INTEGER, INTENT(IN) :: nrows1, ncols1, ncols2
REAL, INTENT(IN) :: matrix1(nrows1, ncols1), matrix2(ncols1, ncols2)
REAL, INTENT(OUT) :: result(nrows1, ncols2)
REAL, INTENT(IN) :: A(nrows1, ncols1), B(ncols1, ncols2)
REAL, INTENT(OUT) :: C(nrows1, ncols2)
INTEGER :: i, j, k

! Perform matrix multiplication
do i = 1, nrows1
do j = 1, ncols2
result(i, j) = 0.0
C(i, j) = 0
do k = 1, ncols1
result(i, j) = result(i, j) + matrix1(i, k)*matrix2(k, j)
C(i, j) = C(i, j) + A(i, k)*B(k, j)
end do
end do
end do
END SUBROUTINE mtx_mult
END SUBROUTINE Bmtx_mult_routine_int_

SUBROUTINE mtx_mult_routine_int_(A, B, C, mtx_size) bind(C)
USE :: ISO_FORTRAN_ENV
USE :: ISO_C_BINDING

INTEGER, INTENT(IN) :: mtx_size
INTEGER(KIND=C_INT), DIMENSION(mtx_size, mtx_size), INTENT(IN) :: A, B
INTEGER(KIND=C_INT), DIMENSION(mtx_size, mtx_size), INTENT(OUT) :: C

INTEGER :: i, j, k

! Perform matrix multiplication
DO i = 1, mtx_size
DO j = 1, mtx_size
C(i, j) = 0
DO k = 1, mtx_size
C(i, j) = C(i, j) + A(i, k) * B(k, j)
END DO
END DO
END DO
END SUBROUTINE mtx_mult_routine_int_

2 changes: 2 additions & 0 deletions tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ set(CPP_TEST_FILES
linalg/t_vector_vector_naive.cpp
linalg/t_matrix_arr_naive.cpp

linalg/t_matrix_arr_f90.cpp

nt/t_cipher.cpp
nt/t_rc4.cpp
nt/t_primes.cpp
Expand Down
63 changes: 58 additions & 5 deletions tests/linalg/t_matrix_arr_f90.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ using namespace gpmp;

namespace {

TEST(MatrixArrayTestI32, AdditionPerformanceComparisonF90) {
TEST(FORTRAN90MatrixArrayTestI32, AdditionPerformanceComparison) {
int mtx_size = 1024;
TEST_COUT << "Matrix size : " << mtx_size << std::endl;
// define input matrices A and B
Expand Down Expand Up @@ -49,26 +49,79 @@ TEST(MatrixArrayTestI32, AdditionPerformanceComparisonF90) {
auto end_std = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;

auto start_f90 = std::chrono::high_resolution_clock::now();

// result using the f90sics implementation
mtx.mtx_add_f90(A, B, result, mtx_size);
auto end_f90 = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds_f90 =
end_f90 - start_f90;

TEST_COUT << "FORTRAN90 Matrix Addition Time : "
<< elapsed_seconds_f90.count() << " seconds" << std::endl;
TEST_COUT << "STANDARD Matrix Addition Time : "
<< elapsed_seconds_std.count() << " seconds" << std::endl;

// compare the results
ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
delete[] A;
delete[] B;
delete[] expected;
delete[] result;
}

TEST(FORTRAN90MatrixArrayTestI32, MultiplicationPerformanceComparison) {
int mtx_size = 1024;
TEST_COUT << "Matrix size : " << mtx_size << std::endl;
// define input matrices A and B
int *A = new int[mtx_size * mtx_size];
int *B = new int[mtx_size * mtx_size];
int *expected = new int[mtx_size * mtx_size];
int *result = new int[mtx_size * mtx_size];

// initialize random number generator
std::random_device rd;
std::mt19937 gen(rd());
std::uniform_int_distribution<int> distribution(1, 100);

// populate matrices A and B with random values
for (int i = 0; i < mtx_size; ++i) {
for (int j = 0; j < mtx_size; ++j) {
A[i * mtx_size + j] = distribution(gen);
B[i * mtx_size + j] = distribution(gen);
}
}

gpmp::linalg::Mtx mtx;
auto start_std = std::chrono::high_resolution_clock::now();

// expected result using the naive implementation
mtx.std_mtx_mult(A, B, expected, mtx_size, mtx_size, mtx_size);

auto end_std = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;

auto start_intrin = std::chrono::high_resolution_clock::now();

// result using the intrinsics implementation
mtx.mtx_add_f90(A, B, result, mtx_size, mtx_size);
mtx.mtx_mult_f90(A, B, result, mtx_size, mtx_size, mtx_size);
auto end_intrin = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed_seconds_intrin =
end_intrin - start_intrin;

TEST_COUT << "INTRINSIC Matrix Addition Time : "
TEST_COUT << "INTRINSIC Matrix Multiplication Time : "
<< elapsed_seconds_intrin.count() << " seconds" << std::endl;
TEST_COUT << "STANDARD Matrix Addition Time : "
TEST_COUT << "STANDARD Matrix Multiplication Time : "
<< elapsed_seconds_std.count() << " seconds" << std::endl;

// compare the results
ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
//ASSERT_TRUE(mtx_verif(expected, result, mtx_size, mtx_size));
delete[] A;
delete[] B;
delete[] expected;
delete[] result;
}



}

0 comments on commit 8423b74

Please sign in to comment.