Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/208_multi-gpu-build' into add-de…
Browse files Browse the repository at this point in the history
…vices-iccs
  • Loading branch information
jwallwork23 committed Jan 24, 2025
2 parents 765f79a + 3f3cd53 commit 4504725
Show file tree
Hide file tree
Showing 12 changed files with 83 additions and 27 deletions.
6 changes: 3 additions & 3 deletions examples/1_SimpleNet/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if(CMAKE_BUILD_TESTS)
add_test(NAME simplenet COMMAND ${Python_EXECUTABLE}
${PROJECT_SOURCE_DIR}/simplenet.py)

# 1. Check the model is saved to file in the expected location with the
# 2. Check the model is saved to file in the expected location with the
# pt2ts.py script
add_test(
NAME pt2ts
Expand All @@ -38,7 +38,7 @@ if(CMAKE_BUILD_TESTS)
# the model
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})

# 1. Check the model can be loaded from file and run in Python and that its
# 3. Check the model can be loaded from file and run in Python and that its
# outputs meet expectations
add_test(
NAME simplenet_infer_python
Expand All @@ -47,7 +47,7 @@ if(CMAKE_BUILD_TESTS)
# model
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})

# 1. Check the model can be loaded from file and run in Fortran and that its
# 4. Check the model can be loaded from file and run in Fortran and that its
# outputs meet expectations
add_test(
NAME simplenet_infer_fortran
Expand Down
4 changes: 2 additions & 2 deletions examples/2_ResNet18/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ if(CMAKE_BUILD_TESTS)
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/resnet18.py
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})

# 1. Check the model is saved to file in the expected location with the
# 2. Check the model is saved to file in the expected location with the
# pt2ts.py script
add_test(
NAME pt2ts
Expand All @@ -40,7 +40,7 @@ if(CMAKE_BUILD_TESTS)
# the model
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})

# 1. Check the model can be loaded from file and run in Fortran and that its
# 3. Check the model can be loaded from file and run in Fortran and that its
# outputs meet expectations
add_test(
NAME resnet_infer_fortran
Expand Down
52 changes: 49 additions & 3 deletions examples/3_MultiGPU/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,53 @@ find_package(FTorch)
find_package(MPI REQUIRED)
message(STATUS "Building with Fortran PyTorch coupling")

check_language(CUDA)
if(CMAKE_CUDA_COMPILER)
enable_language(CUDA)
else()
message(ERROR "No CUDA support")
endif()

# Fortran example
add_executable(simplenet_infer_fortran_gpu simplenet_infer_fortran.f90)
target_link_libraries(simplenet_infer_fortran_gpu PRIVATE FTorch::ftorch)
target_link_libraries(simplenet_infer_fortran_gpu PRIVATE MPI::MPI_Fortran)
add_executable(multigpu_infer_fortran multigpu_infer_fortran.f90)
target_link_libraries(multigpu_infer_fortran PRIVATE FTorch::ftorch)
target_link_libraries(multigpu_infer_fortran PRIVATE MPI::MPI_Fortran)

# Integration testing
if (CMAKE_BUILD_TESTS)
include(CTest)

# 1. Check the PyTorch model runs and its outputs meet expectations
add_test(NAME multigpu COMMAND ${Python_EXECUTABLE}
${PROJECT_SOURCE_DIR}/multigpu.py)

# 2. Check the model is saved to file in the expected location with the
# pt2ts.py script
add_test(
NAME pt2ts
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/pt2ts.py
${PROJECT_BINARY_DIR} # Command line argument: filepath for saving
# the model
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})

# 3. Check the model can be loaded from file and run in Python and that its
# outputs meet expectations
add_test(
NAME multigpu_infer_python
COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/multigpu_infer_python.py
${PROJECT_BINARY_DIR} # Command line argument: filepath to find the
# model
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})

# 4. Check the model can be loaded from file and run in Fortran and that its
# outputs meet expectations
add_test(
NAME multigpu_infer_fortran
COMMAND
multigpu_infer_fortran ${PROJECT_BINARY_DIR}/saved_multigpu_model_cuda.pt
# Command line argument: model file
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
set_tests_properties(
multigpu_infer_fortran PROPERTIES PASS_REGULAR_EXPRESSION
"MultiGPU example ran successfully")
endif()
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from torch import nn


class SimpleNet(nn.Module):
class MultiGPUNet(nn.Module):
"""PyTorch module multiplying an input vector by 2."""

def __init__(
Expand Down Expand Up @@ -42,12 +42,13 @@ def forward(self, batch: torch.Tensor) -> torch.Tensor:


if __name__ == "__main__":
model = SimpleNet()
model = MultiGPUNet().to(torch.device("cuda"))
model.eval()

input_tensor = torch.Tensor([0.0, 1.0, 2.0, 3.0, 4.0])
input_tensor_gpu = input_tensor.to(torch.device("cuda"))

print(f"SimpleNet forward pass on CUDA device {input_tensor_gpu.get_device()}")
with torch.no_grad():
print(model(input_tensor_gpu))
output = model(input_tensor_gpu)
print(output)
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Load saved SimpleNet to TorchScript and run inference example."""
"""Load saved MultiGPUNet to TorchScript and run inference example."""

import torch
from mpi4py import MPI
Expand Down Expand Up @@ -50,14 +50,13 @@ def deploy(saved_model: str, device: str, batch_size: int = 1) -> torch.Tensor:


if __name__ == "__main__":
saved_model_file = "saved_simplenet_model_cuda.pt"
saved_model_file = "saved_multigpu_model_cuda.pt"

rank = MPI.COMM_WORLD.rank
device_to_run = f"cuda:{rank}"
device_to_run = f"cuda:{MPI.COMM_WORLD.rank}"

batch_size_to_run = 1

with torch.no_grad():
result = deploy(saved_model_file, device_to_run, batch_size_to_run)

print(f"{rank}: {result}")
print(f"Output on device {device_to_run}: {result}")
8 changes: 5 additions & 3 deletions examples/3_MultiGPU/pt2ts.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
"""Load a PyTorch model and convert it to TorchScript."""

import os
import sys
from typing import Optional

# FPTLIB-TODO
# Add a module import with your model here:
# This example assumes the model architecture is in an adjacent module `my_ml_model.py`
import simplenet
import multigpu
import torch


Expand Down Expand Up @@ -79,7 +81,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
# Insert code here to load your model as `trained_model`.
# This example assumes my_ml_model has a method `initialize` to load
# architecture, weights, and place in inference mode
trained_model = simplenet.SimpleNet()
trained_model = multigpu.MultiGPUNet()

# Switch off specific layers/parts of the model that behave
# differently during training and inference.
Expand Down Expand Up @@ -115,7 +117,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod

# FPTLIB-TODO
# Set the name of the file you want to save the torchscript model to:
saved_ts_filename = "saved_simplenet_model_cuda.pt"
saved_ts_filename = "saved_multigpu_model_cuda.pt"
# A filepath may also be provided. To do this, pass the filepath as an argument to
# this script when it is run from the command line, i.e. `./pt2ts.py path/to/model`.

Expand Down
6 changes: 3 additions & 3 deletions examples/4_MultiIO/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if(CMAKE_BUILD_TESTS)
add_test(NAME multiionet COMMAND ${Python_EXECUTABLE}
${PROJECT_SOURCE_DIR}/multiionet.py)

# 1. Check the model is saved to file in the expected location with the
# 2. Check the model is saved to file in the expected location with the
# pt2ts.py script
add_test(
NAME pt2ts
Expand All @@ -38,7 +38,7 @@ if(CMAKE_BUILD_TESTS)
# the model
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})

# 1. Check the model can be loaded from file and run in Python and that its
# 3. Check the model can be loaded from file and run in Python and that its
# outputs meet expectations
add_test(
NAME multiionet_infer_python
Expand All @@ -47,7 +47,7 @@ if(CMAKE_BUILD_TESTS)
${PROJECT_BINARY_DIR} # Command line argument: filepath to find the model
WORKING_DIRECTORY ${PROJECT_BINARY_DIR})

# 1. Check the model can be loaded from file and run in Fortran and that its
# 4. Check the model can be loaded from file and run in Fortran and that its
# outputs meet expectations
add_test(
NAME multiionet_infer_fortran
Expand Down
2 changes: 1 addition & 1 deletion examples/6_Autograd/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ if(CMAKE_BUILD_TESTS)
add_test(NAME pyautograd COMMAND ${Python_EXECUTABLE}
${PROJECT_SOURCE_DIR}/autograd.py)

# 1. Check the Fortran Autograd script runs successfully
# 2. Check the Fortran Autograd script runs successfully
add_test(
NAME fautograd
COMMAND autograd
Expand Down
4 changes: 3 additions & 1 deletion examples/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
if(CMAKE_BUILD_TESTS)
add_subdirectory(1_SimpleNet)
add_subdirectory(2_ResNet18)
# add_subdirectory(3_MultiGPU)
if(ENABLE_CUDA AND ENABLE_MPI)
add_subdirectory(3_MultiGPU)
endif()
add_subdirectory(4_MultiIO)
# add_subdirectory(5_Looping)
add_subdirectory(6_Autograd)
Expand Down
6 changes: 5 additions & 1 deletion run_test_suite.sh
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ fi

# Run integration tests
if [ "${UNIT_ONLY}" = false ]; then
EXAMPLES="1_SimpleNet 2_ResNet18 4_MultiIO 6_Autograd"
if [ -e "${BUILD_DIR}/test/examples/3_MultiGPU" ]; then
EXAMPLES="1_SimpleNet 2_ResNet18 3_MultiGPU 4_MultiIO 6_Autograd"
else
EXAMPLES="1_SimpleNet 2_ResNet18 4_MultiIO 6_Autograd"
fi
for EXAMPLE in ${EXAMPLES}; do
pip -q install -r examples/"${EXAMPLE}"/requirements.txt
cd "${BUILD_DIR}"/test/examples/"${EXAMPLE}"
Expand Down
6 changes: 4 additions & 2 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -127,8 +127,10 @@ if(CMAKE_BUILD_TESTS)
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/2_ResNet18
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
# file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/3_MultiGPU
# DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
if(ENABLE_CUDA AND ENABLE_MPI)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/3_MultiGPU
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
endif()
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/4_MultiIO
DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
# file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/5_Looping
Expand Down

0 comments on commit 4504725

Please sign in to comment.