Skip to content

Commit

Permalink
Porting to MSVC (#28)
Browse files Browse the repository at this point in the history
* Improve: better support for Windows and MSVC
* Improve: Move logging around
* Improve: CMakeLists for MSVC some more
* Fix: Missing OpenBLAS config and macros
* Make: Uniform conditions for MSVC
* Docs: Style and links
* Docs: Mention MSVC compatibility
* Docs: Notes on Chrono

---------

Co-authored-by: Ash Vardanian <[email protected]>
  • Loading branch information
RazielXYZ and ashvardanian authored Jan 26, 2025
1 parent 9ab0d56 commit dcb065b
Show file tree
Hide file tree
Showing 4 changed files with 245 additions and 102 deletions.
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,15 @@
"colsb",
"consteval",
"coro",
"cplusplus",
"cppcoro",
"CTRE",
"CUDA",
"denormal",
"DOTPROD",
"Dusíková",
"Eigen",
"Eron",
"excerise",
"fconcepts",
"Fedor",
Expand Down Expand Up @@ -53,6 +55,7 @@
"Niels",
"nlohmann",
"NVCC",
"openblas",
"openmp",
"Ormrod",
"Peta",
Expand Down
109 changes: 74 additions & 35 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Let's used CMake 3.16+ for native sanitizers support
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)

# ------------------------------------------------------------------------------
# Project Setup
Expand Down Expand Up @@ -32,17 +32,25 @@ endif()
# ------------------------------------------------------------------------------
find_package(Threads REQUIRED)
find_package(OpenMP REQUIRED)
find_package(BLAS REQUIRED)
if (BLAS_FOUND)
message(STATUS "BLAS found: ${BLAS_LIBRARIES}")
else ()
message(FATAL_ERROR "BLAS not found")
endif ()


set(FETCHCONTENT_QUIET OFF)
include(FetchContent)

# Fetch and build OpenBLAS
FetchContent_Declare(
OpenBLAS
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS.git
GIT_TAG v0.3.29
)

# Set OpenBLAS build options
set(NOFORTRAN ON CACHE BOOL "Disable Fortran" FORCE)
set(BUILD_WITHOUT_LAPACK OFF CACHE BOOL "Build without LAPACK" FORCE)
set(USE_THREAD ON CACHE BOOL "Use threading" FORCE)

# Make OpenBLAS available
FetchContent_MakeAvailable(OpenBLAS)

# GTest (required by Google Benchmark)
FetchContent_Declare(
GoogleTest
Expand Down Expand Up @@ -105,7 +113,7 @@ endif()
FetchContent_Declare(
VictorZverovichFMT
GIT_REPOSITORY https://github.com/fmtlib/fmt.git
GIT_TAG 11.1.0
GIT_TAG 11.1.2
)
FetchContent_MakeAvailable(VictorZverovichFMT)

Expand Down Expand Up @@ -189,19 +197,23 @@ add_executable(less_slow less_slow.cpp)
set_target_properties(less_slow PROPERTIES POSITION_INDEPENDENT_CODE ON)

# Conditionally add the assembly file(s)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64")
set_source_files_properties(less_slow_amd64.S PROPERTIES LANGUAGE ASM)
target_sources(less_slow PRIVATE less_slow_amd64.S)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
set_source_files_properties(less_slow_aarch64.S PROPERTIES LANGUAGE ASM)
target_sources(less_slow PRIVATE less_slow_aarch64.S)
if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64|AMD64|x64")
set_source_files_properties(less_slow_amd64.S PROPERTIES LANGUAGE ASM)
target_sources(less_slow PRIVATE less_slow_amd64.S)
elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|ARM64")
set_source_files_properties(less_slow_aarch64.S PROPERTIES LANGUAGE ASM)
target_sources(less_slow PRIVATE less_slow_aarch64.S)
endif()

# ------------------------------------------------------------------------------
# Compiler Flags / Options
# ------------------------------------------------------------------------------
if(NOT CMAKE_SYSTEM_NAME STREQUAL "Darwin")
# Apple Clang doesn't support -march=native
# Check for compiler support of `-march=native`
if(CMAKE_CXX_COMPILER_ID MATCHES "Intel")
target_compile_options(less_slow PRIVATE -xHost)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "AppleClang" OR CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
# Apple's Clang and MSVC can't auto-detect the highest CPU features
else()
target_compile_options(less_slow PRIVATE -march=native)
endif()

Expand All @@ -213,8 +225,15 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
-fconcepts-diagnostics-depth=10 # Needed to debug concepts
-fopenmp # OpenMP support, also requires linking
)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
target_compile_options(less_slow PRIVATE
/MP # Build with multiple processes; equivalent to `make -j` except it spans across all cores by default
/wd4068 # Disable the "unknown pragma" warning, as StringZilla uses many GCC and Clang pragmas
/Zc:__cplusplus # Make `__cplusplus` macro actually match used standard
/Zc:preprocessor # Use conformant preprocessor
)

else()
# For other compilers (Clang, MSVC, Intel, etc.)
target_compile_options(less_slow PRIVATE
-Wno-deprecated-pragma
)
Expand All @@ -230,28 +249,48 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang"
)
set_property(TARGET less_slow PROPERTY SANITIZE_ADDRESS TRUE)
set_property(TARGET less_slow PROPERTY SANITIZE_UNDEFINED TRUE)
elseif(CMAKE_CXX_COMPILER_ID MATCHES "MSVC")
target_compile_options(less_slow PRIVATE
$<$<CONFIG:Release>:/O2>
$<$<CONFIG:Release>:/Ob2>
$<$<CONFIG:Release>:/Oi>
$<$<CONFIG:Release>:/Ot>
$<$<CONFIG:Release>:/GL>
)
target_link_options(less_slow PRIVATE
$<$<CONFIG:Release>:/LTCG:incremental>
)
endif()

# ------------------------------------------------------------------------------
# Link Libraries
# ------------------------------------------------------------------------------
# Add OpenBLAS include directory manually
if(openblas_POPULATED)
target_include_directories(less_slow PRIVATE ${openblas_SOURCE_DIR})

# For config.h
target_include_directories(less_slow PRIVATE ${openblas_BINARY_DIR})
endif()

target_link_libraries(less_slow
PRIVATE
Threads::Threads
benchmark
fmt::fmt
range-v3
cppcoro
unifex
stringzilla
yyjson
ctre
# There is no `absl` shortcut:
# https://github.com/abseil/abseil-cpp/blob/master/CMake/README.md#available-abseil-cmake-public-targets
absl::flat_hash_map
nlohmann_json::nlohmann_json
Eigen3::Eigen
${BLAS_LIBRARIES}
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>:TBB::tbb>
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>:OpenMP::OpenMP_CXX>
Threads::Threads
benchmark
fmt::fmt
range-v3
cppcoro
unifex
stringzilla
yyjson
ctre
openblas

# There is no `absl` shortcut:
# https://github.com/abseil/abseil-cpp/blob/master/CMake/README.md#available-abseil-cmake-public-targets
absl::flat_hash_map
nlohmann_json::nlohmann_json
Eigen3::Eigen
$<$<STREQUAL:${CMAKE_SYSTEM_NAME},Linux>:TBB::tbb>
OpenMP::OpenMP_CXX
)
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,17 @@ Some of the highlights include:
- __Is the pointer size really 64 bits__ and how to exploit [pointer-tagging](https://en.wikipedia.org/wiki/Tagged_pointer)?
- __How many packets is [UDP](https://www.cloudflare.com/learning/ddos/glossary/user-datagram-protocol-udp/) dropping__ and how to serve web requests in [`io_uring`](https://en.wikipedia.org/wiki/Io_uring) from user-space?
- __Scatter and Gather__ for 50% faster vectorized disjoint memory operations.
- __How to choose between intrinsics, inline Assembly, and separate Assembly files__ for your performance-critical code?
- __How to choose between intrinsics, inline Assembly, and separate `.S` files__ for your performance-critical code?
- __What are Encrypted Enclaves__ and what's the latency of Intel SGX, AMD SEV, and ARM Realm? 🔜

To read, jump to the [`less_slow.cpp` source file](https://github.com/ashvardanian/less_slow.cpp/blob/main/less_slow.cpp) and read the code snippets and comments.
Follow the instructions below to run the code in your environment and compare it to the comments as you read through the source.

## Running the Benchmarks

The project aims to be compatible with GCC, Clang, and MSVC compilers on Linux, MacOS, and Windows.
That said, to cover the broadest functionality, using GCC on Linux is recommended:

- If you are on Windows, it's recommended that you set up a Linux environment using [WSL](https://docs.microsoft.com/en-us/windows/wsl/install).
- If you are on MacOS, consider using the non-native distribution of Clang from [Homebrew](https://brew.sh) or [MacPorts](https://www.macports.org).
- If you are on Linux, make sure to install CMake and a recent version of GCC or Clang compilers to support C++20 features.
Expand Down
Loading

0 comments on commit dcb065b

Please sign in to comment.