Convert example 3 to XPU

Cambridge-ICCS · Jan 24, 2025 · 6034cb7 · 6034cb7
1 parent 4504725
commit 6034cb7
Show file tree

Hide file tree

Showing 7 changed files with 28 additions and 29 deletions.
diff --git a/examples/3_MultiGPU/CMakeLists.txt b/examples/3_MultiGPU/CMakeLists.txt
@@ -18,12 +18,12 @@ find_package(FTorch)
 find_package(MPI REQUIRED)
 message(STATUS "Building with Fortran PyTorch coupling")
 
-check_language(CUDA)
-if(CMAKE_CUDA_COMPILER)
-  enable_language(CUDA)
-else()
-  message(ERROR "No CUDA support")
-endif()
+# check_language(CUDA)
+# if(CMAKE_CUDA_COMPILER)
+#   enable_language(CUDA)
+# else()
+#   message(ERROR "No CUDA support")
+# endif()
 
 # Fortran example
 add_executable(multigpu_infer_fortran multigpu_infer_fortran.f90)
@@ -47,14 +47,14 @@ if (CMAKE_BUILD_TESTS)
                                   # the model
     WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
 
-  # 3. Check the model can be loaded from file and run in Python and that its
-  #   outputs meet expectations
-  add_test(
-    NAME multigpu_infer_python
-    COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/multigpu_infer_python.py
-            ${PROJECT_BINARY_DIR} # Command line argument: filepath to find the
-                                  # model
-    WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
+  # # 3. Check the model can be loaded from file and run in Python and that its
+  # #   outputs meet expectations
+  # add_test(
+  #   NAME multigpu_infer_python
+  #   COMMAND ${Python_EXECUTABLE} ${PROJECT_SOURCE_DIR}/multigpu_infer_python.py
+  #           ${PROJECT_BINARY_DIR} # Command line argument: filepath to find the
+  #                                 # model
+  #   WORKING_DIRECTORY ${PROJECT_BINARY_DIR})
 
   # 4. Check the model can be loaded from file and run in Fortran and that its
   #   outputs meet expectations

diff --git a/examples/3_MultiGPU/multigpu.py b/examples/3_MultiGPU/multigpu.py
@@ -42,13 +42,13 @@ def forward(self, batch: torch.Tensor) -> torch.Tensor:
 
 
 if __name__ == "__main__":
-    model = MultiGPUNet().to(torch.device("cuda"))
+    model = MultiGPUNet().to(torch.device("xpu"))
     model.eval()
 
     input_tensor = torch.Tensor([0.0, 1.0, 2.0, 3.0, 4.0])
-    input_tensor_gpu = input_tensor.to(torch.device("cuda"))
+    input_tensor_gpu = input_tensor.to(torch.device("xpu"))
 
-    print(f"SimpleNet forward pass on CUDA device {input_tensor_gpu.get_device()}")
+    print(f"SimpleNet forward pass on XPU device {input_tensor_gpu.get_device()}")
     with torch.no_grad():
         output = model(input_tensor_gpu)
     print(output)
diff --git a/examples/3_MultiGPU/multigpu_infer_fortran.f90 b/examples/3_MultiGPU/multigpu_infer_fortran.f90
@@ -4,7 +4,7 @@ program inference
    use, intrinsic :: iso_fortran_env, only : sp => real32
 
    ! Import our library for interfacing with PyTorch
-   use ftorch, only : torch_model, torch_tensor, torch_kCUDA, torch_kCPU, &
+   use ftorch, only : torch_model, torch_tensor, torch_kXPU, torch_kCPU, &
                       torch_tensor_from_array, torch_model_load, torch_model_forward, &
                       torch_delete
 
@@ -49,9 +49,9 @@ program inference
 
    ! Create Torch input tensor from the above array and assign it to the first (and only)
    ! element in the array of input tensors.
-   ! We use the torch_kCUDA device type with device index corresponding to the MPI rank.
+   ! We use the torch_kXPU device type with device index corresponding to the MPI rank.
    call torch_tensor_from_array(in_tensors(1), in_data, tensor_layout, &
-                                torch_kCUDA, device_index=rank)
+                                torch_kXPU, device_index=rank)
 
    ! Create Torch output tensor from the above array.
    ! Here we use the torch_kCPU device type since the tensor is for output only
@@ -60,7 +60,7 @@ program inference
 
    ! Load ML model. Ensure that the same device type and device index are used
    ! as for the input data.
-   call torch_model_load(model, args(1), device_type=torch_kCUDA,                 &
+   call torch_model_load(model, args(1), device_type=torch_kXPU,                 &
                              device_index=rank)
 
    ! Infer

diff --git a/examples/3_MultiGPU/pt2ts.py b/examples/3_MultiGPU/pt2ts.py
@@ -99,7 +99,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
 
     # FPTLIB-TODO
     # Uncomment the following lines to save for inference on GPU (rather than CPU):
-    device = torch.device("cuda")
+    device = torch.device("xpu")
     trained_model = trained_model.to(device)
     trained_model.eval()
     trained_model_dummy_input = trained_model_dummy_input.to(device)
@@ -117,7 +117,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
 
     # FPTLIB-TODO
     # Set the name of the file you want to save the torchscript model to:
-    saved_ts_filename = "saved_multigpu_model_cuda.pt"
+    saved_ts_filename = "saved_multigpu_model_xpu.pt"
     # A filepath may also be provided. To do this, pass the filepath as an argument to
     # this script when it is run from the command line, i.e. `./pt2ts.py path/to/model`.
 
@@ -145,7 +145,7 @@ def load_torchscript(filename: Optional[str] = "saved_model.pt") -> torch.nn.Mod
     # FPTLIB-TODO
     # Scale inputs as above and, if required, move inputs and mode to GPU
     trained_model_dummy_input = 2.0 * trained_model_dummy_input
-    trained_model_dummy_input = trained_model_dummy_input.to("cuda")
+    trained_model_dummy_input = trained_model_dummy_input.to("xpu")
     trained_model_testing_outputs = trained_model(
         trained_model_dummy_input,
     )

diff --git a/examples/3_MultiGPU/requirements.txt b/examples/3_MultiGPU/requirements.txt
@@ -1,2 +1 @@
-mpi4py
 torch
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -1,9 +1,9 @@
 if(CMAKE_BUILD_TESTS)
   add_subdirectory(1_SimpleNet)
   add_subdirectory(2_ResNet18)
-  if(ENABLE_CUDA AND ENABLE_MPI)
+  # if(ENABLE_CUDA AND ENABLE_MPI)
     add_subdirectory(3_MultiGPU)
-  endif()
+  # endif()
   add_subdirectory(4_MultiIO)
   # add_subdirectory(5_Looping)
   add_subdirectory(6_Autograd)

diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
@@ -127,10 +127,10 @@ if(CMAKE_BUILD_TESTS)
        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
   file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/2_ResNet18
        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
-  if(ENABLE_CUDA AND ENABLE_MPI)
+  # if(ENABLE_CUDA AND ENABLE_MPI)
     file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/3_MultiGPU
         DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
-  endif()
+  # endif()
   file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/4_MultiIO
        DESTINATION ${CMAKE_CURRENT_SOURCE_DIR}/test/examples)
   # file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/../examples/5_Looping