Skip to content

Commit

Permalink
Merge branch 'main' into penghuic/update_matmul_cuda_cases
Browse files Browse the repository at this point in the history
  • Loading branch information
PenghuiCheng authored Mar 7, 2025
2 parents d919c33 + 3f93cf8 commit d25714a
Show file tree
Hide file tree
Showing 54 changed files with 435 additions and 275 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ include(${TORCH_XPU_OPS_ROOT}/cmake/ONEMKL.cmake)
include(${TORCH_XPU_OPS_ROOT}/cmake/BuildFlags.cmake)

option(USE_XCCL "Build with XCCL support" OFF)
option(USE_C10D_XCCL "Build with XCCL support for C10D" OFF)
option(USE_C10D_XCCL "Build with XCCL support for C10D" ON)

# -- [ Re-generate the macros file for https://github.com/pytorch/pytorch/pull/147161
macro(update_caffe2_macros_file)
Expand Down
1 change: 1 addition & 0 deletions cmake/BuildFlags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "MSVC"
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -cl-poison-unsupported-fp64-kernels")
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -cl-intel-enable-auto-large-GRF-mode")
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -cl-fp32-correctly-rounded-divide-sqrt")
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "${SYCL_OFFLINE_COMPILER_CG_OPTIONS} -cl-intel-greater-than-4GB-buffer-required")
set(SYCL_OFFLINE_COMPILER_CG_OPTIONS "-options '${SYCL_OFFLINE_COMPILER_CG_OPTIONS}'")

# LNL and BMG share the same compatibility name, which is BMG. BMG is defined as the base platform.
Expand Down
168 changes: 82 additions & 86 deletions cmake/Codegen.cmake
Original file line number Diff line number Diff line change
@@ -1,89 +1,93 @@
if(Codegen_GPU_cmake_included)
if(Codegen_XPU_cmake_included)
return()
endif()
set(Codegen_GPU_cmake_included true)
set(Codegen_XPU_cmake_included true)

set(BUILD_TORCH_XPU_ATEN_GENERATED "${CMAKE_BINARY_DIR}/xpu/ATen/")
set(BUILD_TORCH_XPU_ATEN_GENERATED "${CMAKE_BINARY_DIR}/xpu/ATen")
set(BUILD_TORCH_ATEN_GENERATED "${CMAKE_BINARY_DIR}/aten/src/ATen")
file(MAKE_DIRECTORY ${BUILD_TORCH_XPU_ATEN_GENERATED})

set(RegisterXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp)
set(RegisterSparseXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp)
set(RegisterSparseCsrXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp)
set(RegisterNestedTensorXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp)
set(XPUFallback_PATH ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template)
set(RegisterXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp)
set(RegisterSparseXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp)
set(RegisterSparseCsrXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp)
set(RegisterNestedTensorXPU_GENERATED ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp)
set(XPUFallback_TEMPLATE ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template)
set(XPU_AOTI_INSTALL_DIR ${TORCH_ROOT}/torch/csrc/inductor/aoti_torch/generated/extend)
set(XPU_AOTI_SHIM_HEADER ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.h)
set(XPU_AOTI_SHIM_SOURCE ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp)

if(WIN32)
set(FILE_DISPLAY_CMD type)
# replace forward slash with back slash for compatibility with 'type' command on Windows
string(REPLACE "/" "\\" RegisterXPU_PATH_BACKSLASH "${RegisterXPU_PATH}")
string(REPLACE "/" "\\" XPUFallback_PATH_BACKSLASH "${XPUFallback_PATH}")
set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH_BACKSLASH} ">>" ${RegisterXPU_PATH_BACKSLASH})
else()
set(FILE_DISPLAY_CMD cat)
set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH} ">>" ${RegisterXPU_PATH})
endif()
file(TO_NATIVE_PATH "${RegisterXPU_GENERATED}" RegisterXPU_GENERATED_NATIVE)
file(TO_NATIVE_PATH "${XPUFallback_TEMPLATE}" XPUFallback_TEMPLATE_NATIVE)
set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_TEMPLATE_NATIVE} ">>" ${RegisterXPU_GENERATED_NATIVE})

function(GEN_BACKEND file_yaml)
set(generated_files "")
foreach(f ${ARGN})
list(APPEND generated_files "${BUILD_TORCH_XPU_ATEN_GENERATED}/${f}")
endforeach()
file(GLOB_RECURSE depended_files ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml})
add_custom_command(
OUTPUT ${generated_files}
COMMAND
"${PYTHON_EXECUTABLE}" -m torchgen.gen_backend_stubs
--output_dir ${BUILD_TORCH_XPU_ATEN_GENERATED}
--source_yaml ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml}
COMMAND
${REGISTER_FALLBACK_CMD}
${SIMPLE_TRACE}
WORKING_DIRECTORY ${TORCH_ROOT}
DEPENDS
${depended_files}
${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml}
${XPUFallback_PATH}
)
endfunction(GEN_BACKEND)


set(RegisterXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp)
set(RegisterSparseXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp)
set(RegisterSparseCsrXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp)
set(RegisterNestedTensorXPU_PATH ${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp)
set(XPUFallback_PATH ${TORCH_XPU_OPS_ROOT}/src/ATen/native/xpu/XPUFallback.template)
set(XPU_AOTI_INSTALL_DIR ${TORCH_ROOT}/torch/csrc/inductor/aoti_torch/generated/extend)
function(GEN_XPU file_yaml)
set(generated_files "")
foreach(f ${ARGN})
list(APPEND generated_files "${f}")
endforeach()
file(GLOB_RECURSE depend_files ${TORCH_XPU_OPS_ROOT}/yaml/${file_yaml})
set(CODEGEN_TEMPLATE ${TORCH_XPU_OPS_ROOT}/yaml/)
set(CODEGEN_XPU_YAML_DIR ${TORCH_XPU_OPS_ROOT}/yaml)

# Codegen prepare process
if(WIN32)
string(REPLACE "/" "\\" DestPATH "${CODEGEN_TEMPLATE}templates")
string(REPLACE "/" "\\" SrcPATH "${CMAKE_SOURCE_DIR}/aten/src/ATen/templates")
file(TO_NATIVE_PATH "${CODEGEN_XPU_YAML_DIR}/templates" DestPATH)
file(TO_NATIVE_PATH "${CMAKE_SOURCE_DIR}/aten/src/ATen/templates" SrcPATH)
execute_process(COMMAND cmd /c xcopy ${SrcPATH} ${DestPATH} /E /H /C /I /Y > nul)
string(REPLACE "/" "\\" RegisterXPU_PATH_BACKSLASH "${RegisterXPU_PATH}")
string(REPLACE "/" "\\" XPUFallback_PATH_BACKSLASH "${XPUFallback_PATH}")
set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH_BACKSLASH} ">>" ${RegisterXPU_PATH_BACKSLASH})
else()
execute_process(COMMAND ln -s ${CMAKE_SOURCE_DIR}/aten/src/ATen/templates ${CODEGEN_TEMPLATE}) # soft link to pytorch templates
set(REGISTER_FALLBACK_CMD ${FILE_DISPLAY_CMD} ${XPUFallback_PATH} ">>" ${RegisterXPU_PATH})
execute_process(COMMAND ln -s ${CMAKE_SOURCE_DIR}/aten/src/ATen/templates ${CODEGEN_XPU_YAML_DIR}) # soft link to pytorch templates
endif()
add_custom_command(
OUTPUT ${generated_files}
COMMAND

set(XPU_CODEGEN_COMMAND
"${PYTHON_EXECUTABLE}" -m torchgen.gen
--source-path ${TORCH_XPU_OPS_ROOT}/yaml/
--source-path ${CODEGEN_XPU_YAML_DIR}
--install-dir ${BUILD_TORCH_XPU_ATEN_GENERATED}
--per-operator-headers
--static-dispatch-backend
--backend-whitelist XPU SparseXPU SparseCsrXPU NestedTensorXPU
# --xpu: generate in-tree RegisterXPU_0.cpp for in-tree OPs
--xpu
)

set(XPU_INSTALL_HEADER_COMMAND
"${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/install_xpu_headers.py
--src-header-dir ${BUILD_TORCH_XPU_ATEN_GENERATED}
--dst-header-dir ${BUILD_TORCH_ATEN_GENERATED}
)

execute_process(
COMMAND
${XPU_CODEGEN_COMMAND}
--generate headers
--dry-run
--output-dependencies ${BUILD_TORCH_XPU_ATEN_GENERATED}/generated_headers.cmake
RESULT_VARIABLE RETURN_VALUE
WORKING_DIRECTORY ${TORCH_ROOT}
)

if(NOT RETURN_VALUE EQUAL 0)
message(FATAL_ERROR "Failed to get generated_headers list")
endif()

execute_process(
COMMAND
${XPU_INSTALL_HEADER_COMMAND}
--dry-run
RESULT_VARIABLE RETURN_VALUE
WORKING_DIRECTORY ${TORCH_ROOT}
)

if(NOT RETURN_VALUE EQUAL 0)
message(FATAL_ERROR "Failed to get XPU header list to install")
endif()

add_custom_command(
COMMENT "Generating XPU ATen Codegen..."
OUTPUT ${generated_files}
COMMAND
${XPU_CODEGEN_COMMAND}
--static-dispatch-backend
# --update-aoti-c-shim: generate extend/c_shim_xpu.h
--update-aoti-c-shim
# --exten-aoti-c-shim: specifiy the extend/c_shim_xpu
Expand All @@ -95,16 +99,13 @@ function(GEN_XPU file_yaml)
COMMAND
${REGISTER_FALLBACK_CMD}
# Codegen post-process
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterXPU_PATH}
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseXPU_PATH}
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterSparseCsrXPU_PATH}
COMMAND "${PYTHON_EXECUTABLE}" ${TORCH_XPU_OPS_ROOT}/tools/codegen/remove_headers.py --register_xpu_path ${RegisterNestedTensorXPU_PATH}
${SIMPLE_TRACE}
COMMAND
${XPU_INSTALL_HEADER_COMMAND}
WORKING_DIRECTORY ${TORCH_ROOT}
DEPENDS
${depended_files}
${TORCH_XPU_OPS_ROOT}/yaml/native/${file_yaml}
${XPUFallback_PATH}
${CODEGEN_XPU_YAML_DIR}/native/${file_yaml}
${XPUFallback_TEMPLATE}
${TORCH_XPU_OPS_ROOT}/tools/codegen/install_xpu_headers.py
)

# Post codegen delete the copied templates folder only on Windows.
Expand All @@ -118,30 +119,25 @@ function(GEN_XPU file_yaml)
endif()
endfunction(GEN_XPU)

# GEN_BACKEND(
# xpu_functions.yaml
# XPUNativeFunctions.h
# RegisterXPU_0.cpp)

GEN_XPU(
native_functions.yaml
${BUILD_TORCH_XPU_ATEN_GENERATED}/XPUFunctions.h
${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterXPU_0.cpp
${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseXPU_0.cpp
${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterSparseCsrXPU_0.cpp
${BUILD_TORCH_XPU_ATEN_GENERATED}/RegisterNestedTensorXPU_0.cpp
${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.h
${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp
${BUILD_TORCH_XPU_ATEN_GENERATED}/XPUFunctions_inl.h
${RegisterXPU_GENERATED}
${RegisterSparseXPU_GENERATED}
${RegisterSparseCsrXPU_GENERATED}
${RegisterNestedTensorXPU_GENERATED}
${XPU_AOTI_SHIM_HEADER}
${XPU_AOTI_SHIM_SOURCE}
)

include(${BUILD_TORCH_XPU_ATEN_GENERATED}/xpu_ops_generated_headers.cmake)

# The c_shim_xpu.cpp needs include files in ${CMAKE_BINARY_DIR}/xpu/ATen/ops/*.h)
# The include path is auto generated as "#include <ATen/ops/*.h">
# To follow the design of aoti codegen, here ${CMAKE_BINARY_DIR}/xpu is added to
# $TORCH_XPU_OPS_INCLUDE_DIRS, so that "#include <ATen/ops/*.h>" works.
list(APPEND TORCH_XPU_OPS_INCLUDE_DIRS ${CMAKE_BINARY_DIR}/xpu)

list(APPEND xpu_generated_src ${RegisterXPU_PATH} ${RegisterSparseXPU_PATH} ${RegisterSparseCsrXPU_PATH} ${RegisterNestedTensorXPU_PATH})
list(APPEND xpu_generated_src ${XPU_AOTI_INSTALL_DIR}/c_shim_xpu.cpp)
add_custom_target(TORCH_XPU_GEN_TARGET DEPENDS ${xpu_generated_src})
list(APPEND xpu_generated_src
${RegisterXPU_GENERATED}
${RegisterSparseXPU_GENERATED}
${RegisterSparseCsrXPU_GENERATED}
${RegisterNestedTensorXPU_GENERATED}
${XPU_AOTI_SHIM_SOURCE}
)
set(ATen_XPU_GEN_SRCS ${xpu_generated_src})
4 changes: 4 additions & 0 deletions src/ATen/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,7 @@ set(ATen_XPU_SYCL_SRCS ${ATen_XPU_SYCL_SRCS} PARENT_SCOPE)
foreach(HEADER ${xpu_h})
install(FILES ${HEADER} DESTINATION "${AT_INSTALL_INCLUDE_DIR}/ATen/xpu")
endforeach()

foreach(HEADER ${xpu_ops_generated_headers})
install(FILES ${HEADER} DESTINATION ${AT_INSTALL_INCLUDE_DIR}/ATen/ops)
endforeach()
4 changes: 2 additions & 2 deletions src/ATen/native/sparse/xpu/SparseCsrTensorMath.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#include <ATen/native/sparse/SparseStubs.h>
#include <ATen/native/sparse/xpu/sycl/SparseCsrTensorMathKernels.h>
#include <xpu/ATen/ops/_convert_indices_from_coo_to_csr_native.h>
#include <xpu/ATen/ops/_convert_indices_from_csr_to_coo_native.h>
#include <ATen/ops/_convert_indices_from_coo_to_csr_native.h>
#include <ATen/ops/_convert_indices_from_csr_to_coo_native.h>

namespace at::native {

Expand Down
6 changes: 3 additions & 3 deletions src/ATen/native/xpu/Activation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@
#include <ATen/native/TensorIterator.h>

#include <ATen/ops/empty_like.h>
#include <xpu/ATen/ops/empty.h>
#include <xpu/ATen/ops/gelu_backward_native.h>
#include <xpu/ATen/ops/gelu_native.h>
#include <ATen/ops/empty.h>
#include <ATen/ops/gelu_backward_native.h>
#include <ATen/ops/gelu_native.h>

#include <ATen/native/xpu/sycl/ActivationEluKernels.h>
#include <ATen/native/xpu/sycl/ActivationGeluKernel.h>
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/AdaptiveAveragePooling2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

#include <ATen/ops/mean.h>
#include <ATen/ops/zeros_like.h>
#include <xpu/ATen/ops/_adaptive_avg_pool2d_backward_native.h>
#include <xpu/ATen/ops/_adaptive_avg_pool2d_native.h>
#include <ATen/ops/_adaptive_avg_pool2d_backward_native.h>
#include <ATen/ops/_adaptive_avg_pool2d_native.h>

#include <ATen/native/xpu/sycl/AdaptiveAveragePooling2dKernels.h>

Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/AdaptiveAveragePooling3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@

#include <ATen/ops/empty.h>
#include <ATen/ops/empty_like.h>
#include <xpu/ATen/ops/adaptive_avg_pool3d_backward_native.h>
#include <xpu/ATen/ops/adaptive_avg_pool3d_native.h>
#include <ATen/ops/adaptive_avg_pool3d_backward_native.h>
#include <ATen/ops/adaptive_avg_pool3d_native.h>

namespace at::native {

Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/AdaptiveMaxPooling2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#include <ATen/native/xpu/sycl/AdaptiveMaxPooling2dKernels.h>
#include <comm/RegisterUtils.h>

#include <xpu/ATen/ops/adaptive_max_pool2d_backward_native.h>
#include <xpu/ATen/ops/adaptive_max_pool2d_native.h>
#include <ATen/ops/adaptive_max_pool2d_backward_native.h>
#include <ATen/ops/adaptive_max_pool2d_native.h>

namespace at {
namespace native {
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/AdaptiveMaxPooling3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
#include <ATen/native/xpu/sycl/AdaptiveMaxPooling3dKernels.h>

#include <ATen/ops/empty.h>
#include <xpu/ATen/ops/adaptive_max_pool3d_backward_native.h>
#include <xpu/ATen/ops/adaptive_max_pool3d_native.h>
#include <ATen/ops/adaptive_max_pool3d_backward_native.h>
#include <ATen/ops/adaptive_max_pool3d_native.h>

namespace at {
namespace native {
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/AveragePool2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
#include <ATen/native/xpu/sycl/AveragePool2dKernels.h>
#include <comm/RegisterUtils.h>

#include <xpu/ATen/ops/avg_pool2d_backward_native.h>
#include <xpu/ATen/ops/avg_pool2d_native.h>
#include <ATen/ops/avg_pool2d_backward_native.h>
#include <ATen/ops/avg_pool2d_native.h>

namespace at {
namespace native {
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/AveragePool3d.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
#include <ATen/core/Tensor.h>
#include <ATen/native/xpu/sycl/AveragePool3dKernels.h>

#include <xpu/ATen/ops/avg_pool3d_backward_native.h>
#include <xpu/ATen/ops/avg_pool3d_native.h>
#include <ATen/ops/avg_pool3d_backward_native.h>
#include <ATen/ops/avg_pool3d_native.h>

namespace at {
namespace native {
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/BinaryOps.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <ATen/native/DispatchStub.h>
#include <ATen/native/TensorIterator.h>

#include <xpu/ATen/ops/add_native.h>
#include <ATen/ops/add_native.h>

#include <ATen/native/xpu/sycl/BinaryBitwiseOpsKernels.h>
#include <ATen/native/xpu/sycl/BinaryGeometricKernels.h>
Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/Col2Im.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
#include <ATen/native/xpu/sycl/Col2ImKernel.h>

#include <comm/xpu_aten.h>
#include <xpu/ATen/ops/col2im_native.h>
#include <ATen/ops/col2im_native.h>

namespace at::native {

Expand Down
6 changes: 3 additions & 3 deletions src/ATen/native/xpu/DilatedMaxPool2d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@
#include <ATen/native/xpu/sycl/DilatedMaxPool2d.h>
#include <comm/RegisterUtils.h>

#include <xpu/ATen/ops/max.h>
#include <xpu/ATen/ops/max_pool2d_with_indices_backward_native.h>
#include <xpu/ATen/ops/max_pool2d_with_indices_native.h>
#include <ATen/ops/max.h>
#include <ATen/ops/max_pool2d_with_indices_backward_native.h>
#include <ATen/ops/max_pool2d_with_indices_native.h>

namespace at {
namespace native {
Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/DilatedMaxPool3d.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
#include <ATen/native/xpu/sycl/DilatedMaxPool3d.h>

#include <ATen/ops/empty.h>
#include <xpu/ATen/ops/max_pool3d_with_indices_backward_native.h>
#include <xpu/ATen/ops/max_pool3d_with_indices_native.h>
#include <ATen/ops/max_pool3d_with_indices_backward_native.h>
#include <ATen/ops/max_pool3d_with_indices_native.h>
namespace at {
namespace native {

Expand Down
4 changes: 2 additions & 2 deletions src/ATen/native/xpu/Dropout.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
#include <ATen/native/TensorIterator.h>
#include <ATen/native/xpu/sycl/DropoutKernels.h>

#include <xpu/ATen/ops/native_dropout_backward_native.h>
#include <xpu/ATen/ops/native_dropout_native.h>
#include <ATen/ops/native_dropout_backward_native.h>
#include <ATen/ops/native_dropout_native.h>

#include <comm/xpu_aten.h>

Expand Down
2 changes: 1 addition & 1 deletion src/ATen/native/xpu/Embedding.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#include <ATen/core/op_registration/adaption.h>

#include <xpu/ATen/ops/embedding_dense_backward_native.h>
#include <ATen/ops/embedding_dense_backward_native.h>

#include <ATen/native/xpu/sycl/EmbeddingKernels.h>
#include <comm/xpu_aten.h>
Expand Down
Loading

0 comments on commit d25714a

Please sign in to comment.