Skip to content

Commit

Permalink
Merge pull request #198 from pjotrp/master
Browse files Browse the repository at this point in the history
Small optimization and static build with GNU Guix.
  • Loading branch information
AndreaGuarracino authored Aug 14, 2023
2 parents cb87621 + 1dc6d56 commit 0594c60
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 18 deletions.
1 change: 1 addition & 0 deletions .github/workflows/small_test_on_push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ jobs:
g++
python3-dev
libatomic-ops-dev
gcc-multilib
autoconf
libgsl-dev
zlib1g-dev
Expand Down
41 changes: 31 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,30 +1,50 @@
# Specify the minimum version for CMake

cmake_minimum_required(VERSION 3.16)
cmake_minimum_required(VERSION 3.16 FATAL_ERROR)

# Project's name
project(smoothxg)

# We build using c++17
set(CMAKE_CXX_STANDARD 17)

find_package(ZLIB REQUIRED)

enable_testing()

include(CheckIPOSupported) # adds lto
check_ipo_supported(RESULT ipo_supported OUTPUT output)
SET(CMAKE_RANLIB "gcc-ranlib") # too try lto with older runtimes

# This builds a static version of ./bin/smoothxg
option(BUILD_STATIC "Build static binary" OFF)
if (BUILD_STATIC)
set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
set(BUILD_SHARED_LIBS OFF)
set(CMAKE_EXE_LINKER_FLAGS "-static")
endif()

find_package(OpenMP)
# if(OpenMP_CXX_FOUND)
# target_link_libraries(MyTarget PUBLIC OpenMP::OpenMP_CXX)
# endif()

find_package(PkgConfig REQUIRED)
find_package(ZLIB REQUIRED)
# find_package(ZSTD REQUIRED) - needs recent versions of cmake

# Preload the following libraries before running tests
set(PRELOAD "libasan.so:libjemalloc.so.2")

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release CACHE STRING
"Choose the type of build, options are: Release Debug Generic." FORCE)
message(STATUS "Choose the type of build, options are: Release Debug Generic!")
endif()

# set(CMAKE_BUILD_TYPE Debug) -- don't uncomment this, instead run
# cmake -DCMAKE_BUILD_TYPE=Debug ..

if (${CMAKE_BUILD_TYPE} MATCHES Release)
set(EXTRA_FLAGS "-Ofast -march=native")
if(NOT EXTRA_FLAGS)
set(EXTRA_FLAGS "-Ofast -march=native -flto -fno-fat-lto-objects")
endif()
set(CMAKE_CXX_FLAGS_RELEASE "-DNDEBUG") # reset CXX_FLAGS to replace -O3 with -Ofast

# Increase SPOA's performance
Expand Down Expand Up @@ -418,7 +438,7 @@ add_executable(smoothxg
target_link_libraries(smoothxg spoa abpoa)
target_link_libraries(smoothxg ${smoothxg_LIBS})

target_link_libraries(smoothxg ZLIB::ZLIB zstd jemalloc)
target_link_libraries(smoothxg z zstd jemalloc)

set_target_properties(smoothxg PROPERTIES OUTPUT_NAME "smoothxg")
target_include_directories(smoothxg PUBLIC ${smoothxg_INCLUDES})
Expand All @@ -442,9 +462,10 @@ file(MAKE_DIRECTORY ${CMAKE_SOURCE_DIR}/include)
execute_process(COMMAND bash ${CMAKE_SOURCE_DIR}/scripts/generate_git_version.sh ${CMAKE_SOURCE_DIR}/include)

add_test(
NAME smoothxg-test
COMMAND bin/smoothxg -t 2 -g test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.seqwish.gfa -j 5k -e 5k -l 700,900,1100 -m test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.maf -C "consensus,10,100:test/data/gi_568815592_32578768-32589835.txt:y,1000:test/data/gi_568815592_32578768-32589835.txt:n,10000" -o test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.gfa -r 12
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
NAME smoothxg-test
# 2 cores because of CI limitations
COMMAND bin/smoothxg -t 2 -g test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.seqwish.gfa -j 5k -e 5k -l 700,900,1100 -m test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.maf -C "consensus,10,100:test/data/gi_568815592_32578768-32589835.txt:y,1000:test/data/gi_568815592_32578768-32589835.txt:n,10000" -o test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.gfa -r 12
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR})
set_tests_properties(smoothxg-test PROPERTIES ENVIRONMENT "ASAN_OPTIONS=detect_leaks=1:symbolize=1;LSAN_OPTIONS=verbosity=0:log_threads=1")

if (APPLE)
Expand Down
35 changes: 33 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ Path names should be unique.

### building from source

`smoothxg` uses cmake to build itself and its dependencies. At least GCC version 9.3.0 is required for compilation.
`smoothxg` uses cmake to build itself and its dependencies. At least GCC version 9.3.0 is required for compilation.
You can check your version via:

```
Expand All @@ -50,8 +50,31 @@ cd smoothxg
cmake -H. -Bbuild && cmake --build build -- -j 4
```

To optimize for architecture

```
cmake -DCMAKE_BUILD_TYPE=Release .. && make -j 16 VERBOSE=1 && ctest . --verbose
```


`libzstd-dev` must be of version 1.4 or higher.

Run tests:

```
ctest . --verbose
```

Note that smoothxg depends on git submodules:

```
git submodule update --init --recursive
```

### Using Guix for building

In your source dir make sure git submodules are up-to-date and follow the instructions in [guix.scm](guix.scm).

#### Notes for distribution

If you need to avoid machine-specific optimizations, use the `CMAKE_BUILD_TYPE=Generic` build type:
Expand All @@ -60,6 +83,14 @@ If you need to avoid machine-specific optimizations, use the `CMAKE_BUILD_TYPE=G
cmake -H. -Bbuild -DCMAKE_BUILD_TYPE=Generic && cmake --build build -- -j 3
```

To build for a specific architecture you can use EXTRA_FLAGS

```shell
cmake -DCMAKE_BUILD_TYPE=Release -DEXTRA_FLAGS="-Ofast -march=znver1" .. && make -j 16 VERBOSE=1
```

And to make a static build add the `-DBUILD_STATIC=ON` switch.

### Bioconda

`smoothxg` recipes for Bioconda are available at https://anaconda.org/bioconda/smoothxg.
Expand Down Expand Up @@ -119,7 +150,7 @@ For more details about how to handle Guix channels, go to https://git.genenetwor

#### Notes for debugging

To make the `-S/--write-split-block-fastas` and `-B/--write-poa-block-fastas` options available, and emit a table
To make the `-S/--write-split-block-fastas` and `-B/--write-poa-block-fastas` options available, and emit a table
with POA block statistics, add the `-DPOA_DEBUG=ON` option:

```shell
Expand Down
17 changes: 11 additions & 6 deletions guix.scm
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
;;
;; guix build -f guix.scm
;;
;; (make sure you are running a recent guix and checked out all submodules)
;;
;; To do a cross compilation build for ARM64
;;
;; guix build -f guix.scm --target=aarch64-linux
Expand All @@ -17,6 +19,10 @@
;; cmake -DCMAKE_BUILD_TYPE=Debug ..
;; cmake --build . --verbose -- -j 14 && ctest . --verbose
;;
;; Or for a release, something like
;;
;; cd build && rm -rf ../build/* ; cmake -DCMAKE_BUILD_TYPE=Release .. && make -j 16 VERBOSE=1 && ctest . --verbose
;;
;; For the tests you may need /usr/bin/env. In a container create it with
;;
;; mkdir -p /usr/bin ; ln -s $GUIX_ENVIRONMENT/bin/env /usr/bin/env
Expand Down Expand Up @@ -65,22 +71,21 @@
(inputs
`(
("coreutils" ,coreutils)
; ("cpp-httplib" ,cpp-httplib) later!
("pybind11" ,pybind11) ;; see libstd++ note in remarks above
; ("intervaltree" ,intervaltree) later!
("jemalloc" ,jemalloc)
("gcc" ,gcc-11)
("gcc-lib" ,gcc-11 "lib")
("gcc-toolchain" ,gcc-toolchain)
("gcc-toolchain" , gcc-toolchain)
("gdb" ,gdb)
("git" ,git) ; pulls in perl which does not do RISV-V cross builds yet
; ("lodepng" ,lodepng) later!
("openmpi" ,openmpi)
("python" ,python)
("sdsl-lite" ,sdsl-lite)
("libdivsufsort" ,libdivsufsort)
("zlib" ,zlib)
("zstd-lib" ,zstd "lib")
("zlib-static" ,zlib "static")
("zlib" ,zlib) ; also for the static build we need the includes
("zstd-lib" ,zstd "static")
("zstd" ,zstd "lib") ; same
))
(native-inputs
`(("pkg-config" ,pkg-config)
Expand Down
56 changes: 56 additions & 0 deletions test/performance/check.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Performance testing

In this document we want to make sure we don't regress on speed.

On a `AMD Ryzen 7 3700X 8-Core Processor`:

2 cores:

```
Command being timed: "bin/smoothxg -t 2 -g test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.seqwish.gfa -j 5k -e 5k -l 700,900,1100 -m test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.maf -C consensus,10,100:test/data/gi_568815592_32578768-32589835.txt:y,1000:test/data/gi_568815592_32578768-32589835.txt:n,10000 -o test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.gfa -r 12"
User time (seconds): 5.75
System time (seconds): 1.06
Percent of CPU this job got: 26%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:25.45
```

Note the debug version is about 35s.

8 cores does not make much difference:

```
Command being timed: "bin/smoothxg -t 8 -g test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.seqwish.gfa -j 5k -e 5k -l 700,900,1100 -m test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.maf -C consensus,10,100:test/data/gi_568815592_32578768-32589835.txt:y,1000:test/data/gi_568815592_32578768-32589835.txt:n,10000 -o test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.gfa -r 12"
User time (seconds): 12.42
System time (seconds): 4.85
Percent of CPU this job got: 73%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:23.42
```

Compiling with LTO creates a slightly faster runtime on 2 threads

```
Command being timed: "bin/smoothxg -t 2 -g test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.seqwish.gfa -j 5k -e 5k -l 700,900,1100 -m test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.maf -C consensus,10,100:test/data/gi_568815592_32578768-32589835.txt:y,1000:test/data/gi_568815592_32578768-32589835.txt:n,10000 -o test/data/DRB1-3123.fa.gz.pggb-s3000-p70-n10-a70-K16-k8-w10000-j5000-e5000.smooth.gfa -r 12"
User time (seconds): 5.43
System time (seconds): 1.19
Percent of CPU this job got: 26%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:25.01
```

Honoring -Ofast gives some speedup

```
User time (seconds): 5.55
System time (seconds): 1.02
Percent of CPU this job got: 26%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:24.40
```

The static build with GNU Guix is same

```
User time (seconds): 5.35
System time (seconds): 1.12
Percent of CPU this job got: 26%
Elapsed (wall clock) time (h:mm:ss or m:ss): 0:24.46
```

0 comments on commit 0594c60

Please sign in to comment.