Skip to content

Commit

Permalink
#EDITS: some mass updated to linear algebra module and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
akielaries committed Feb 27, 2024
1 parent 97926a0 commit d0465af
Show file tree
Hide file tree
Showing 27 changed files with 275 additions and 5,383 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wno-unused-result -Wparentheses -Ws
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG -Wall -Wextra -Wfloat-equal -Wcast-qual")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshadow -Wunreachable-code -D __GPMP_CPP_API__")

project(openGPMP LANGUAGES CXX C Fortran)
project(openGPMP LANGUAGES CXX C Fortran ASM)
set(PROJECT_VERSION "1.0")

include(CheckIncludeFileCXX)
Expand Down
20 changes: 15 additions & 5 deletions experiment/test.c
Original file line number Diff line number Diff line change
@@ -1,9 +1,20 @@
#include <stdio.h>

// Declare the assembly function as an external function
extern int asm_function(int a, int b, int c, int d, int e, int f, int g, int h, int i, int j, int k, double l);

int add (int a, int b, int c) {
extern int asm_function(int a,
int b,
int c,
int d,
int e,
int f,
int g,
int h,
int i,
int j,
int k,
double l);

int add(int a, int b, int c) {

// performs:
//
Expand All @@ -20,9 +31,8 @@ int main() {
int a = 5;
int b = 7;
int c = 10;
//int result = asm_function(5, 7);
// int result = asm_function(5, 7);
int result = add(a, b, c);
printf("Result: %d\n", result);
return 0;
}

52 changes: 49 additions & 3 deletions include/linalg/_dgemm.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,47 @@ namespace linalg {
class DGEMM {
public:
/**< Buffer for storing packed micro panels of A */
static double DGEMM_BUFF_A[BLOCK_SZ_M * BLOCK_SZ_K]__attribute__ ((aligned (16)));
static double DGEMM_BUFF_A[BLOCK_SZ_M * BLOCK_SZ_K]
__attribute__((aligned(16)));
/**< Buffer for storing packed micro panels of B */
static double DGEMM_BUFF_B[BLOCK_SZ_K * BLOCK_SZ_N]__attribute__ ((aligned (16)));
static double DGEMM_BUFF_B[BLOCK_SZ_K * BLOCK_SZ_N]
__attribute__((aligned(16)));
/**< Buffer for storing intermediate results */
static double DGEMM_BUFF_C[BLOCK_SZ_MR * BLOCK_SZ_NR]__attribute__ ((aligned (16)));
static double DGEMM_BUFF_C[BLOCK_SZ_MR * BLOCK_SZ_NR]
__attribute__((aligned(16)));

/**
* @brief Performs matrix-matrix multiplication (DGEMM) using an
* assembly implementation It computes the product of matrices A and B,
* scaled by alpha and beta, and stores the result in matrix C
*
* @param A Pointer to the first matrix (A) in row-major order
* @param B Pointer to the second matrix (B) in row-major order
* @param C Pointer to the result matrix (C) in row-major order
* @param nextA Pointer to the next matrix A
* @param nextB Pointer to the next matrix B
* @param kl Value representing the remaining columns of matrix A
* @param kb Value representing the remaining rows of matrix B
* @param incRowC Increment for moving to the next row of matrix C
* @param incColC Increment for moving to the next column of matrix C
* @param alpha Scalar value to scale the product of matrices A and B
* @param beta Scalar value to scale matrix C before adding the product
*
* @note This calls an Assembly implementation depending on detected
* host system. x86 (SSE, AVX2) and ARM NEON supported
*/
/*void dgemm_kernel_asm(const double *A,
const double *B,
double *C,
const double *nextA,
const double *nextB,
long kl,
long kb,
long incRowC,
long incColC,
double alpha,
double beta);
*/

/**
* @brief Packs micro panels of size BLOCK_SZ_MR rows by k columns from A
Expand Down Expand Up @@ -143,6 +179,16 @@ class DGEMM {
int incRowC,
int incColC);

void dgemm_micro_kernel(long kc,
double alpha,
const double *A,
const double *B,
double beta,
double *C,
long incRowC,
long incColC,
const double *nextA,
const double *nextB);
/**
* @brief Computes Y += alpha*X (double precision AX + Y)
*
Expand Down
3 changes: 3 additions & 0 deletions modules/linalg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ set(SOURCE_FILES
svd.cpp
vector_naive.cpp
igemm_arr.cpp

sgemm_arr.cpp

dgemm_arr.cpp
dgemm_kernel_sse.S
)

# Add files depending on the detected SIMD ISA
Expand Down
Loading

0 comments on commit d0465af

Please sign in to comment.