Skip to content

Commit

Permalink
#MERGE: Linear Algebra module mass update (broken) (#112)
Browse files Browse the repository at this point in the history
  • Loading branch information
akielaries authored Feb 21, 2024
1 parent f2a5080 commit 2bd9b74
Show file tree
Hide file tree
Showing 95 changed files with 13,240 additions and 4,297 deletions.
11 changes: 7 additions & 4 deletions .github/workflows/build_osx.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,20 @@ jobs:
- name: install GCC suite
run: |
brew install gcc
brew install lcov
brew reinstall gcc
- name: get compilers
run: |
which gcc
which g++
which g++-13
which gfortran
gcc --version
- name: Configure CMake
- name: Configure CMake (GCC)
run: |
cmake -DCMAKE_C_COMPILER=/usr/bin/gcc -DCMAKE_CXX_COMPILER=/usr/bin/g++ -DCMAKE_Fortran_COMPILER=/usr/local/bin/gfortran -B ${{github.workspace}}/build -DCMAKE_BUILD_TYPE=${{env.BUILD_TYPE}}
cmake -DCMAKE_C_COMPILER=/usr/local/bin/gcc-13 -DCMAKE_CXX_COMPILER=/usr/local/bin/g++-13 -DCMAKE_Fortran_COMPILER=/usr/local/bin/gfortran -B ${{github.workspace}}/build -DBUILD_TESTS=ON -DCMAKE_VERBOSE_MAKEFILE=ON
- name: Build
- name: Build (GCC)
run: |
cmake --build ${{github.workspace}}/build --config ${{env.BUILD_TYPE}}
2 changes: 1 addition & 1 deletion .github/workflows/codecov.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:
run: |
mkdir build && cd build
# run tests and populate lcov.info file
cmake -S ../ -DBUILD_TESTS=ON && make
cmake -S ../ -DBUILD_TESTS=ON -DBUILD_COV=ON && make
- name: Generate HTML Analysis & Unit Test Coverage
run: |
Expand Down
91 changes: 66 additions & 25 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,38 +33,76 @@

cmake_minimum_required(VERSION 3.20)
set (CMAKE_CXX_STANDARD 20)
set (CMAKE_CXX_FLAGS "-march=native -g -Wno-unused-result -Wparentheses \
-Wsign-compare -DNDEBUG -Wall -Wextra \
-Wfloat-equal -Wcast-qual -Wshadow \
-Wunreachable-code -D __GPMP_CPP_API__"
)

set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -g -Wno-unused-result -Wparentheses -Wsign-compare")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNDEBUG -Wall -Wextra -Wfloat-equal -Wcast-qual")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wshadow -Wunreachable-code -D __GPMP_CPP_API__")

project(openGPMP LANGUAGES CXX C Fortran)
set(PROJECT_VERSION "1.0")

project(openGPMP LANGUAGES CXX C ASM Fortran)
include(CheckIncludeFileCXX)

# Dynamically determine the project version based on the latest Git tag
#find_package(Git)
message(STATUS "Detecting ${CMAKE_CXX_COMPILER} macros")
execute_process(
COMMAND ${CMAKE_CXX_COMPILER} -march=native -dM -E -
RESULT_VARIABLE result
OUTPUT_VARIABLE compiler_output
INPUT_FILE /dev/null # Provide an empty input
)
message(STATUS "Detecting ${CMAKE_CXX_COMPILER} macros - done")

#if(GIT_FOUND)
# execute_process(
# COMMAND ${GIT_EXECUTABLE} describe --tags
# WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
# OUTPUT_VARIABLE GIT_TAG
# OUTPUT_STRIP_TRAILING_WHITESPACE
# )
# check if the command execution was successful
if(result EQUAL 0)
string(FIND "${compiler_output}" "#define __ARM_NEON 1" arm_neon_index)
string(FIND "${compiler_output}" "#define __AVX2__ 1" avx2_index)
string(FIND "${compiler_output}" "#define __SSE__ 1" sse_index)
string(FIND "${compiler_output}" "#define __MMX__ 1" mmx_index)

# string(SUBSTRING "${GIT_TAG}" 1 -1 PROJECT_VERSION)
message(STATUS "Detecting available SIMD ISAs")

# order of magnitude for INTRINS:
# ARM NEON is only available extension on ARM procs
# AVX2 is the highest order support and x86 procs supporting this will
# also support SSE2, SSE, MMX, and the others that came before it

if(arm_neon_index GREATER -1)
message(STATUS "SIMD ISA : ARM NEON supported")
set(SIMD_ISA "ARM_NEON")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__GPMP_ARM_NEON__")
check_include_file_cxx("arm_neon.h" HEADER_EXISTS)

elseif(avx2_index GREATER -1)
message(STATUS "SIMD ISA : AVX2 is supported")
set(SIMD_ISA "AVX2")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__GPMP_AVX2__")
check_include_file_cxx("immintrin.h" HEADER_EXISTS)

elseif(sse_index GREATER -1)
message(STATUS "SIMD ISA : SSE is supported")
set(SIMD_ISA "SSE")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__GPMP_SSE__")
check_include_file_cxx("emmintrin.h" HEADER_EXISTS)
check_include_file_cxx("smmintrin.h" HEADER_EXISTS)

elseif(mmx_index GREATER -1)
message(STATUS "SIMD ISA : MMX is supported")
set(SIMD_ISA "MMX")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__GPMP_MMX__")
check_include_file_cxx("mmintrin.h" HEADER_EXISTS)

# message(STATUS "Determined project version: ${PROJECT_VERSION}")
#else()
else()
message(STATUS "No supported SIMD ISA")
endif()

set(PROJECT_VERSION "1.0")
message(STATUS "Detecting available SIMD ISAs - done")
else()
message(STATUS "Command failed with error code ${result}")
endif()

#message(WARNING "Git not found. Using default project version: ${PROJECT_VERSION}")

#endif()
# compile with native microarchitecture
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=native")

# Set build type if not specified
set(DEFAULT_BUILD_TYPE "Release")
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' as none was specified.")
Expand All @@ -73,8 +111,10 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()


set(CMAKE_CXX_FLAGS_RELEASE "-O0")
set(CMAKE_Fortran_FLAGS "-O0")
set(CMAKE_Fortran_FLAGS "-O0")

include(GNUInstallDirs)

option(BUILD_ARITH "[!] Build Arithmetic Module" ON)
Expand Down Expand Up @@ -135,6 +175,7 @@ else()
$<TARGET_OBJECTS:core>
$<TARGET_OBJECTS:ml>
$<TARGET_OBJECTS:nt>
$<TARGET_OBJECTS:optim>
$<TARGET_OBJECTS:stats>
)

Expand Down Expand Up @@ -165,7 +206,7 @@ else()
export(TARGETS ${PROJECT_NAME} FILE openGPMPConfig.cmake)

if (BUILD_TESTS)
message(STATUS "Building and running tests")
message(STATUS "Including tests")

enable_testing()

Expand Down
2 changes: 1 addition & 1 deletion CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
openGPMP welcomes any and all contributors! There are **many** ways to contribute to openGPMP. New implementations, updates to source code, documentations, language API, and much more. To submit a change simply fork the repository and beging the
pull request process. Squash commits, clean up misc. files, etc. and submit a pull request to the openGPMP repository. Be sure
to include a hash `#` prefixing your commit messages. For example `#EDITS:` or `#UPDATES:` are used to signaling commits that
feature minor edits/updates. The prefixes, `#PATCH`, `#MINOR`, `#MAJOR`, are reserved for automated versioning/tagging of
feature minor edits/updates. `#BUG:` references a bug fix commit, and `#FEAT` represents a new feature being added to the project. The prefixes, `#PATCH`, `#MINOR`, `#MAJOR`, are reserved for automated versioning/tagging of
commits based on messages. This process still features a manual intervention by design meaning the maintainer of this project
will be in charge of publishing versions.

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<p align="center">
<img src="https://raw.githubusercontent.com/akielaries/openGPMP/v1.0.0-rc.1/docs/openGPMP_logo_med.png" />
<img src="https://raw.githubusercontent.com/akielaries/openGPMP/main/docs/openGPMP_logo_med.png" />
</p>

![Version](https://img.shields.io/github/v/release/akielaries/openGPMP?color=%23BF40BF)
Expand Down
66 changes: 58 additions & 8 deletions experiment/blas2.c
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,64 @@
#include <chrono>
#include <iostream>

const int matrixSize = 8192;
const int matrixSize = 1024;

void run_openblas_mtx_add();
void run_openblas_mtx_mul();

int main() {
run_openblas_mtx_add();
run_openblas_mtx_mul();

return 0;
}

void run_openblas_mtx_mul() {
// Create matrices A, B, and C
double *A = new double[matrixSize * matrixSize];
double *B = new double[matrixSize * matrixSize];
double *C = new double[matrixSize * matrixSize];

// Initialize matrices A and B with random values
for (int i = 0; i < matrixSize * matrixSize; ++i) {
A[i] = rand() % 100;
B[i] = rand() % 100;
}

// Measure the time for matrix multiplication using OpenBLAS
auto start_std = std::chrono::high_resolution_clock::now();

// Use OpenBLAS to multiply matrices A and B and store the result in matrix
// C
cblas_dgemm(CblasRowMajor,
CblasNoTrans,
CblasNoTrans,
matrixSize,
matrixSize,
matrixSize,
1.0,
A,
matrixSize,
B,
matrixSize,
0.0,
C,
matrixSize);

auto end_std = std::chrono::high_resolution_clock::now();

std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;

// Calculate and print the elapsed time
std::cout << "Matrix multiplication using OpenBLAS took "
<< elapsed_seconds_std.count() << " seconds." << std::endl;

// Cleanup
delete[] A;
delete[] B;
delete[] C;
}

void run_openblas_mtx_add() {
// Create matrices A, B, and C
double *A = new double[matrixSize * matrixSize];
Expand All @@ -24,19 +73,20 @@ void run_openblas_mtx_add() {
}

// Measure the time for matrix addition using OpenBLAS
auto start_time = std::chrono::steady_clock::now();
// auto start_time = std::chrono::steady_clock::now();
auto start_std = std::chrono::high_resolution_clock::now();

// Use OpenBLAS to add matrices A and B and store the result in matrix C
cblas_daxpy(matrixSize * matrixSize, 1.0, A, 1, C, 1);

auto end_time = std::chrono::steady_clock::now();
// auto end_time = std::chrono::steady_clock::now();
auto end_std = std::chrono::high_resolution_clock::now();

std::chrono::duration<double> elapsed_seconds_std = end_std - start_std;

// Calculate and print the elapsed time
std::cout << "OpenBLAS - Time elapsed: "
<< std::chrono::duration_cast<std::chrono::milliseconds>(
end_time - start_time)
.count()
<< " ms" << std::endl;
std::cout << "TIME-ELAPSED: " << elapsed_seconds_std.count() << "seconds"
<< std::endl;

// Cleanup
delete[] A;
Expand Down
19 changes: 19 additions & 0 deletions experiment/test.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
.globl asm_function
asm_function:
// Assembly code to add two integers
// Parameters are passed in registers: a in %edi, b in %esi
// Result is stored in %eax
// Load a into %eax
mov %edi, %eax

// Add b to %eax
add %esi, %eax

// Add c to %eax
add %edx, %eax

// Subtract d from %eax
sub %ecx, %eax

ret // Return

21 changes: 21 additions & 0 deletions experiment/test.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
#include <stdio.h>

// Declare the assembly function as an external function
extern int asm_function(int a, int b, int c, int d);

int add (int a, int b, int c) {
int d = 2;

return asm_function(a, b, c, d);
}

int main() {
int a = 5;
int b = 7;
int c = 10;
//int result = asm_function(5, 7);
int result = add(a, b, c);
printf("Result: %d\n", result);
return 0;
}

10 changes: 7 additions & 3 deletions include/core/threads.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,13 @@ class ThreadPool {
*/
template <class F, class... Args>
auto enqueue(F &&f, Args &&...args)
-> std::future<typename std::result_of<F(Args...)>::type> {
//-> std::future<typename std::result_of<F(Args...)>::type> {
-> std::future<typename std::invoke_result<F, Args...>::type> {

// this is the return type of the passed in function
using return_type = typename std::result_of<F(Args...)>::type;
// using return_type = typename std::result_of<F(Args...)>::type;
using return_type = typename std::invoke_result<F, Args...>::type;

// * SHARED POINTER to PACKAGED TASK used to store the passed in i
// function + its arguments
// * std::bind used to create function object binded to the
Expand Down Expand Up @@ -215,7 +218,8 @@ class ThreadDispatch {
*/
template <typename Function, typename... Args>
auto dispatch(ThreadPool &pool, Function &&func, Args &&...args)
-> std::future<typename std::result_of<Function(Args...)>::type> {
//-> std::future<typename std::result_of<Function(Args...)>::type> {
-> std::future<typename std::invoke_result<Function, Args...>::type> {

// enqueue the function call to the thread pool
auto result = pool.enqueue(std::forward<Function>(func),
Expand Down
Loading

0 comments on commit 2bd9b74

Please sign in to comment.