Skip to content

Commit

Permalink
#EDITS: adding optimized WO intrinsics or processor accel DGEMM matri…
Browse files Browse the repository at this point in the history
…x multiplication class
  • Loading branch information
akielaries committed Feb 20, 2024
1 parent 8a40bde commit efe99cc
Show file tree
Hide file tree
Showing 9 changed files with 910 additions and 0 deletions.
236 changes: 236 additions & 0 deletions include/linalg/_dgemm.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,236 @@
/*************************************************************************
*
* Project
* _____ _____ __ __ _____
* / ____| __ \| \/ | __ \
* ___ _ __ ___ _ __ | | __| |__) | \ / | |__) |
* / _ \| '_ \ / _ \ '_ \| | |_ | ___/| |\/| | ___/
*| (_) | |_) | __/ | | | |__| | | | | | | |
* \___/| .__/ \___|_| |_|\_____|_| |_| |_|_|
* | |
* |_|
*
* Copyright (C) Akiel Aries, <[email protected]>, et al.
*
* This software is licensed as described in the file LICENSE, which
* you should have received as part of this distribution. The terms
* among other details are referenced in the official documentation
* seen here : https://akielaries.github.io/openGPMP/ along with
* important files seen in this project.
*
* You may opt to use, copy, modify, merge, publish, distribute
* and/or sell copies of the Software, and permit persons to whom
* the Software is furnished to do so, under the terms of the
* LICENSE file. As this is an Open Source effort, all implementations
* must be of the same methodology.
*
*
*
* This software is distributed on an AS IS basis, WITHOUT
* WARRANTY OF ANY KIND, either express or implied.
*
************************************************************************/

/** BLOCK SIZES */
#define BLOCK_SZ_M 384 /**< Rows of A and C */
#define BLOCK_SZ_K 384 /**< Columns of A and rows of B */
#define BLOCK_SZ_N 4096 /**< Columns of B and C */
#define BLOCK_SZ_MR 4 /**< Rows of the micro-panel of A and C */
#define BLOCK_SZ_NR 4 /**< Columns of the micro-panel of B and C */

namespace gpmp {
namespace linalg {

/**
* @class DGEMM
* @brief Class for performing matrix multiplication on double type arrays
*/
class DGEMM {
public:
/**< Buffer for storing packed micro panels of A */
static double DGEMM_BUFF_A[BLOCK_SZ_M * BLOCK_SZ_K];
/**< Buffer for storing packed micro panels of B */
static double DGEMM_BUFF_B[BLOCK_SZ_K * BLOCK_SZ_N];
/**< Buffer for storing intermediate results */
static double DGEMM_BUFF_C[BLOCK_SZ_MR * BLOCK_SZ_NR];

/**
* @brief Packs micro panels of size BLOCK_SZ_MR rows by k columns from A
* without padding
*
* @param k Number of columns to pack
* @param A Pointer to the source matrix A
* @param incRowA Increment between consecutive rows of A
* @param incColA Increment between consecutive columns of A
* @param buffer Pointer to the buffer to store the packed micro panels
*/
void pack_micro_A(int k,
const double *A,
int incRowA,
int incColA,
double *buffer);

/**
* @brief Packs panels from A with padding if needed
*
* @param mc Number of rows to pack
* @param kc Number of columns to pack
* @param A Pointer to the source matrix A
* @param incRowA Increment between consecutive rows of A
* @param incColA Increment between consecutive columns of A
* @param buffer Pointer to the buffer to store the packed panels
*/
void pack_buffer_A(int mc,
int kc,
const double *A,
int incRowA,
int incColA,
double *buffer);

/**
* @brief Packs micro panels of size BLOCK_SZ_NR columns by k rows from B
* without padding
*
* @param k Number of rows to pack
* @param B Pointer to the source matrix B
* @param incRowB Increment between consecutive rows of B
* @param incColB Increment between consecutive columns of B
* @param buffer Pointer to the buffer to store the packed micro panels
*/
void pack_micro_B(int k,
const double *B,
int incRowB,
int incColB,
double *buffer);

/**
* @brief Packs panels from B with padding if needed
*
* @param kc Number of rows to pack
* @param nc Number of columns to pack
* @param B Pointer to the source matrix B
* @param incRowB Increment between consecutive rows of B
* @param incColB Increment between consecutive columns of B
* @param buffer Pointer to the buffer to store the packed panels
*/
void pack_buffer_B(int kc,
int nc,
const double *B,
int incRowB,
int incColB,
double *buffer);

/**
* @brief Computes the micro kernel that multiplies panels from A and B
*
* @param kc Number of columns
* @param alpha Scalar alpha
* @param A Pointer to the packed panel A
* @param B Pointer to the packed panel B
* @param beta Scalar beta
* @param C Pointer to the output matrix C
* @param incRowC Increment between consecutive rows of C
* @param incColC Increment between consecutive columns of C
*/
void dgemm_micro_kernel(int kc,
double alpha,
const double *A,
const double *B,
double beta,
double *C,
int incRowC,
int incColC);

/**
* @brief Computes Y += alpha*X (double precision AX + Y)
*
* @param m Number of rows
* @param n Number of columns
* @param alpha Scalar alpha
* @param X Pointer to matrix X
* @param incRowX Increment between consecutive rows of X
* @param incColX Increment between consecutive columns of X
* @param Y Pointer to matrix Y
* @param incRowY Increment between consecutive rows of Y
* @param incColY Increment between consecutive columns of Y
*/
void dgeaxpy(int m,
int n,
double alpha,
const double *X,
int incRowX,
int incColX,
double *Y,
int incRowY,
int incColY);

/**
* @brief Scales elements of X by alpha
*
* @param m Number of rows
* @param n Number of columns
* @param alpha Scalar alpha
* @param X Pointer to matrix X
* @param incRowX Increment between consecutive rows of X
* @param incColX Increment between consecutive columns of X
*/
void
dgescal(int m, int n, double alpha, double *X, int incRowX, int incColX);

/**
* @brief Macro kernel for the multiplication of blocks of A and B
*
* @param mc Number of rows in the block of C
* @param nc Number of columns in the block of C
* @param kc Number of columns in the blocks of A and rows of B
* @param alpha Scalar alpha
* @param beta Scalar beta
* @param C Pointer to the output matrix C
* @param incRowC Increment between consecutive rows of C
* @param incColC Increment between consecutive columns of C
*/
void dgemm_macro_kernel(int mc,
int nc,
int kc,
double alpha,
double beta,
double *C,
int incRowC,
int incColC);

/**
* @brief Main DGEMM entrypoint, computes C <- beta*C + alpha*A*B
*
* @param m Number of rows of A and rows of C
* @param n Number of columns of B and columns of C
* @param k Number of columns of A and rows of B
* @param alpha Scalar alpha
* @param A Pointer to matrix A
* @param incRowA Increment between consecutive rows of A
* @param incColA Increment between consecutive columns of A
* @param B Pointer to matrix B
* @param incRowB Increment between consecutive rows of B
* @param incColB Increment between consecutive columns of B
* @param beta Scalar beta
* @param C Pointer to matrix C
* @param incRowC Increment between consecutive rows of C
* @param incColC Increment between consecutive columns of C
*/
void dgemm_nn(int m,
int n,
int k,
double alpha,
const double *A,
int incRowA,
int incColA,
const double *B,
int incRowB,
int incColB,
double beta,
double *C,
int incRowC,
int incColC);
};

}
}
7 changes: 7 additions & 0 deletions include/linalg/mtx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,13 @@ class Mtx {
int cols_a,
int cols_b);

void mtx_mult(const double *A,
const double *B,
double *C,
int rows_a,
int cols_a,
int cols_b);

void mtx_tpose(const int *A, int *C, int rows, int cols);

/**
Expand Down
2 changes: 2 additions & 0 deletions modules/linalg/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ set(SOURCE_FILES
tensor.cpp
svd.cpp
vector_naive.cpp
dgemm_arr.cpp
)

# Add files depending on the detected SIMD ISA
Expand All @@ -33,6 +34,7 @@ if (SIMD_ISA STREQUAL "AVX2")
vector_avx2_i16.cpp
vector_avx2_i32.cpp
vector_avx2_f64.cpp

)
elseif (SIMD_ISA STREQUAL "ARM_NEON")
list(APPEND SOURCE_FILES
Expand Down
Loading

0 comments on commit efe99cc

Please sign in to comment.