diff --git a/.github/workflows/build-sphinx.yml b/.github/workflows/build-sphinx.yml
index 623644a89a6..6246ee13e12 100644
--- a/.github/workflows/build-sphinx.yml
+++ b/.github/workflows/build-sphinx.yml
@@ -31,7 +31,7 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@0.11.0
+        uses: styfle/cancel-workflow-action@0.12.0
         with:
           access_token: ${{ github.token }}
 
@@ -42,7 +42,7 @@ jobs:
           echo "$GITHUB_CONTEXT"
 
       - name: Free Disk Space (Ubuntu)
-        uses: jlumbroso/free-disk-space@main
+        uses: jlumbroso/free-disk-space@v1.3.1
         with:
           docker-images: false
 
@@ -76,13 +76,13 @@ jobs:
           sudo apt-get install -y nvidia-cuda-toolkit clinfo
 
       - name: Checkout repo
-        uses: actions/checkout@v3.5.2
+        uses: actions/checkout@v4.1.1
         with:
           fetch-depth: 0
 
       # https://github.com/marketplace/actions/setup-miniconda
       - name: Setup miniconda
-        uses: conda-incubator/setup-miniconda@v2.2.0
+        uses: conda-incubator/setup-miniconda@v3.0.1
         with:
           auto-update-conda: true
           python-version: ${{ env.python-ver }}
@@ -97,7 +97,7 @@ jobs:
 
       - name: Install dpnp dependencies
         run: |
-          conda install numpy"<1.24" dpctl">=0.15.1dev2" mkl-devel-dpcpp onedpl-devel tbb-devel dpcpp_linux-64"<2024.0.1" \
+          conda install numpy"<1.24" dpctl">=0.15.1dev3" mkl-devel-dpcpp onedpl-devel tbb-devel dpcpp_linux-64"<2024.0.1" \
               cmake cython pytest ninja scikit-build sysroot_linux-64">=2.28" ${{ env.CHANNELS }}
 
       - name: Install cuPy dependencies
@@ -110,8 +110,7 @@ jobs:
         run: conda list
 
       - name: Build library
-        run: |
-          CC=icx CXX=icpx python setup.py develop -G Ninja -- -DDPCTL_MODULE_PATH=$(python -m dpctl --cmakedir)
+        run: python scripts/build_locally.py
 
       - name: Build docs
         run: make html
@@ -184,7 +183,7 @@ jobs:
     runs-on: ubuntu-20.04
 
     steps:
-      - uses: actions/checkout@v3.5.2
+      - uses: actions/checkout@v4.1.1
         with:
           fetch-depth: 0
 
diff --git a/.github/workflows/conda-package.yml b/.github/workflows/conda-package.yml
index 9b6b093801a..32f62306ae5 100644
--- a/.github/workflows/conda-package.yml
+++ b/.github/workflows/conda-package.yml
@@ -30,6 +30,8 @@ env:
       test_umath.py
       test_usm_type.py
       third_party/cupy/core_tests
+      third_party/cupy/linalg_tests/test_decomposition.py
+      third_party/cupy/linalg_tests/test_norms.py
       third_party/cupy/linalg_tests/test_product.py
       third_party/cupy/linalg_tests/test_solve.py
       third_party/cupy/logic_tests/test_comparison.py
@@ -66,17 +68,17 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@0.11.0
+        uses: styfle/cancel-workflow-action@0.12.0
         with:
           access_token: ${{ github.token }}
 
       - name: Checkout DPNP repo
-        uses: actions/checkout@v3.5.2
+        uses: actions/checkout@v4.1.1
         with:
           fetch-depth: 0
 
       - name: Setup miniconda
-        uses: conda-incubator/setup-miniconda@v2.2.0
+        uses: conda-incubator/setup-miniconda@v3.0.1
         with:
           auto-update-conda: true
           python-version: ${{ matrix.python }}
@@ -97,7 +99,7 @@ jobs:
         run: conda install conda-build
 
       - name: Cache conda packages
-        uses: actions/cache@v3.3.0
+        uses: actions/cache@v4
         env:
           CACHE_NUMBER: 1  # Increase to reset cache
         with:
@@ -112,7 +114,7 @@ jobs:
         run: conda build --no-test --python ${{ matrix.python }} ${{ env.CHANNELS }} conda-recipe
 
       - name: Upload artifact
-        uses: actions/upload-artifact@v3.1.2
+        uses: actions/upload-artifact@v4.3.0
         with:
           name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }}
           path: ${{ env.CONDA_BLD }}${{ env.PACKAGE_NAME }}-*.tar.bz2
@@ -145,7 +147,7 @@ jobs:
 
     steps:
       - name: Download artifact
-        uses: actions/download-artifact@v3.0.2
+        uses: actions/download-artifact@v4.1.1
         with:
           name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }}
           path: ${{ env.pkg-path-in-channel }}
@@ -156,7 +158,7 @@ jobs:
           tar -xvf ${{ env.pkg-path-in-channel }}/${{ env.PACKAGE_NAME }}-*.tar.bz2 -C ${{ env.extracted-pkg-path }}
 
       - name: Setup miniconda
-        uses: conda-incubator/setup-miniconda@v2.2.0
+        uses: conda-incubator/setup-miniconda@v3.0.1
         with:
           auto-update-conda: true
           python-version: ${{ matrix.python }}
@@ -188,7 +190,7 @@ jobs:
           TEST_CHANNELS: '-c ${{ env.channel-path }} ${{ env.CHANNELS }}'
 
       - name: Cache conda packages
-        uses: actions/cache@v3.3.0
+        uses: actions/cache@v4
         env:
           CACHE_NUMBER: 1 # Increase to reset cache
         with:
@@ -246,7 +248,7 @@ jobs:
 
     steps:
       - name: Download artifact
-        uses: actions/download-artifact@v3.0.2
+        uses: actions/download-artifact@v4.1.1
         with:
           name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }}
           path: ${{ env.pkg-path-in-channel }}
@@ -266,7 +268,7 @@ jobs:
           dir ${{ env.extracted-pkg-path }}
 
       - name: Setup miniconda
-        uses: conda-incubator/setup-miniconda@v2.2.0
+        uses: conda-incubator/setup-miniconda@v3.0.1
         with:
           auto-update-conda: true
           python-version: ${{ matrix.python }}
@@ -312,7 +314,7 @@ jobs:
         run: more lockfile
 
       - name: Cache conda packages
-        uses: actions/cache@v3.3.0
+        uses: actions/cache@v4
         env:
           CACHE_NUMBER: 1  # Increase to reset cache
         with:
@@ -380,12 +382,12 @@ jobs:
 
     steps:
       - name: Download artifact
-        uses: actions/download-artifact@v3.0.2
+        uses: actions/download-artifact@v4.1.1
         with:
           name: ${{ env.PACKAGE_NAME }} ${{ runner.os }} Python ${{ matrix.python }}
 
       - name: Setup miniconda
-        uses: conda-incubator/setup-miniconda@v2.2.0
+        uses: conda-incubator/setup-miniconda@v3.0.1
         with:
           auto-update-conda: true
           python-version: ${{ matrix.python }}
@@ -408,7 +410,7 @@ jobs:
       run:
         shell: bash -el {0}
     steps:
-      - uses: conda-incubator/setup-miniconda@v2
+      - uses: conda-incubator/setup-miniconda@v3.0.1
         with:
           run-post: false
           channel-priority: "disabled"
@@ -419,7 +421,7 @@ jobs:
         run: conda install anaconda-client
 
       - name: Checkout repo
-        uses: actions/checkout@v3.5.2
+        uses: actions/checkout@v4.1.1
         with:
           repository: IntelPython/devops-tools
           fetch-depth: 0
diff --git a/.github/workflows/generate_coverage.yaml b/.github/workflows/generate_coverage.yaml
index a9e92395762..e7479d445ea 100644
--- a/.github/workflows/generate_coverage.yaml
+++ b/.github/workflows/generate_coverage.yaml
@@ -19,17 +19,17 @@ jobs:
 
     steps:
       - name: Cancel Previous Runs
-        uses: styfle/cancel-workflow-action@0.11.0
+        uses: styfle/cancel-workflow-action@0.12.0
         with:
           access_token: ${{ github.token }}
 
       - name: Checkout repo
-        uses: actions/checkout@v3.5.2
+        uses: actions/checkout@v4.1.1
         with:
           fetch-depth: 0
 
       - name: Setup miniconda
-        uses: conda-incubator/setup-miniconda@v2.2.0
+        uses: conda-incubator/setup-miniconda@v3.0.1
         with:
           auto-update-conda: true
           python-version: ${{ env.python-ver }}
@@ -44,7 +44,7 @@ jobs:
         run: |
           # use DPC++ compiler 2023.2 to work around an issue with crash
           conda install cython llvm cmake">=3.21" scikit-build ninja pytest pytest-cov coverage[toml] \
-              dpctl">=0.15.1dev2" dpcpp_linux-64"=2023.2" sysroot_linux-64">=2.28" mkl-devel-dpcpp tbb-devel"=2021.10" \
+              dpctl">=0.15.1dev3" dpcpp_linux-64"=2023.2" sysroot_linux-64">=2.28" mkl-devel-dpcpp tbb-devel"=2021.10" \
               onedpl-devel ${{ env.CHANNELS }}
 
       - name: Conda info
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 18668bf10c4..dd5047f22b1 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -16,8 +16,8 @@ jobs:
           sudo ln -s /usr/bin/clang-format-12 /usr/bin/clang-format
           clang-format --version
 
-      - uses: actions/checkout@v3.5.2
-      - uses: actions/setup-python@v4.6.1
+      - uses: actions/checkout@v4.1.1
+      - uses: actions/setup-python@v5
         with:
           python-version: '3.11'
       - uses: pre-commit/action@v3.0.0
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index a8823fa7421..b3787c3833c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -2,13 +2,13 @@
 # See https://pre-commit.com/hooks.html for more hooks
 repos:
 -   repo: https://github.com/PyCQA/bandit
-    rev: '1.7.5'
+    rev: '1.7.7'
     hooks:
     -   id: bandit
         pass_filenames: false
         args: ["-r", "dpnp", "-lll"]
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.4.0
+    rev: v4.5.0
     hooks:
     -   id: check-ast
     -   id: check-builtin-literals
@@ -43,12 +43,12 @@ repos:
     -   id: rst-inline-touching-normal
     -   id: text-unicode-replacement-char
 -   repo: https://github.com/psf/black
-    rev: 23.7.0
+    rev: 23.12.1
     hooks:
     -   id: black
         args: ["--check", "--diff", "--color"]
 -   repo: https://github.com/pycqa/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
     -   id: isort
         name: isort (python)
@@ -59,13 +59,13 @@ repos:
         name: isort (pyi)
         types: [pyi]
 -   repo: https://github.com/pycqa/flake8
-    rev: 6.1.0
+    rev: 7.0.0
     hooks:
     -   id: flake8
         args: ["--config=.flake8"]
         additional_dependencies:
             - flake8-docstrings==1.7.0
-            - flake8-bugbear==23.6.5
+            - flake8-bugbear==24.1.17
 -   repo: https://github.com/pocc/pre-commit-hooks
     rev: v1.3.5
     hooks:
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 46e988bdcbe..3e1a71a4283 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -12,16 +12,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED True)
 set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE BOTH)
 set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY BOTH)
 
-if (NOT DEFINED DPCTL_MODULE_PATH)
-  if (DEFINED ENV{DPCTL_MODULE_PATH})
-    set(DPCTL_MODULE_PATH $ENV{DPCTL_MODULE_PATH})
-  else ()
-    message(FATAL_ERROR "Specify DPCTL_MODULE_PATH, either via cmake or as environment varibale")
-  endif()
-endif()
-
-set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${DPCTL_MODULE_PATH})
-
 
 find_package(IntelSYCL REQUIRED PATHS ${CMAKE_SOURCE_DIR}/dpnp/backend/cmake/Modules NO_DEFAULT_PATH)
 find_package(TBB QUIET)
@@ -78,9 +68,9 @@ find_package(NumPy REQUIRED)
 
 set(CYTHON_FLAGS "-t -w \"${CMAKE_SOURCE_DIR}\"")
 find_package(Cython REQUIRED)
-find_package(Dpctl REQUIRED)
 
-message(STATUS "Dpctl_INCLUDE_DIRS=" ${Dpctl_INCLUDE_DIRS})
+find_package(Dpctl REQUIRED)
+message(STATUS "Dpctl_INCLUDE_DIR=" ${Dpctl_INCLUDE_DIR})
 message(STATUS "Dpctl_TENSOR_INCLUDE_DIR=" ${Dpctl_TENSOR_INCLUDE_DIR})
 
 if(WIN32)
diff --git a/LICENSE.txt b/LICENSE.txt
index 9b32f02892f..e15fe8e291c 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -1,4 +1,4 @@
-Copyright (c) 2016-2023, Intel Corporation
+Copyright (c) 2016-2024, Intel Corporation
 All rights reserved.
 
 Redistribution and use in source and binary forms, with or without
diff --git a/benchmarks/pytest_benchmark/test_random.py b/benchmarks/pytest_benchmark/test_random.py
index 09478b54bec..7c083d20009 100644
--- a/benchmarks/pytest_benchmark/test_random.py
+++ b/benchmarks/pytest_benchmark/test_random.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/conda-recipe/bld.bat b/conda-recipe/bld.bat
index 8b2e1927f7f..960b254bd39 100644
--- a/conda-recipe/bld.bat
+++ b/conda-recipe/bld.bat
@@ -15,11 +15,7 @@ set "MKLROOT=%PREFIX%/Library"
 set "TBB_ROOT_HINT=%PREFIX%/Library"
 set "DPL_ROOT_HINT=%PREFIX%/Library"
 
-%PYTHON% -m dpctl --cmakedir > Output
-set /p DPCTL_CMAKE_DIR= < Output
-
 set "SKBUILD_ARGS=-G Ninja -- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
-set "SKBUILD_ARGS=%SKBUILD_ARGS% -DDPCTL_MODULE_PATH:PATH=%DPCTL_CMAKE_DIR% "
 set "SKBUILD_ARGS=%SKBUILD_ARGS% -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
 FOR %%V IN (14.0.0 14 15.0.0 15 16.0.0 16 17.0.0 17) DO @(
diff --git a/conda-recipe/build.sh b/conda-recipe/build.sh
index 015a25bbbbd..b4ea4c44cb2 100755
--- a/conda-recipe/build.sh
+++ b/conda-recipe/build.sh
@@ -12,8 +12,7 @@ export CMAKE_GENERATOR="Ninja"
 export TBB_ROOT_HINT=$PREFIX
 export DPL_ROOT_HINT=$PREFIX
 export MKL_ROOT_HINT=$PREFIX
-SKBUILD_ARGS="-- -DDPCTL_MODULE_PATH=$($PYTHON -m dpctl --cmakedir) "
-SKBUILD_ARGS="${SKBUILD_ARGS} -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
+SKBUILD_ARGS="-- -DCMAKE_C_COMPILER:PATH=icx -DCMAKE_CXX_COMPILER:PATH=icpx -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 SKBUILD_ARGS="${SKBUILD_ARGS} -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON"
 
 # Build wheel package
diff --git a/conda-recipe/meta.yaml b/conda-recipe/meta.yaml
index 5ac20ad9e59..ffcab2886ae 100644
--- a/conda-recipe/meta.yaml
+++ b/conda-recipe/meta.yaml
@@ -1,5 +1,5 @@
-{% set required_compiler_and_mkl_version = "2024.0" %}
-{% set required_dpctl_version = "0.15.1dev2" %}
+{% set required_compiler_and_mkl_version = "2024.1" %}
+{% set required_dpctl_version = "0.15.1dev3" %}
 
 package:
     name: dpnp
@@ -48,7 +48,8 @@ test:
       - tests
       - setup.cfg
     commands:
-      - python -c "import dpnp"
+      - python -c "import dpnp; print(dpnp.__version__)"
+      - python -m dpctl -f
       - pytest -s
 
 about:
diff --git a/doc/conf.py b/doc/conf.py
index f895b345d94..037adf6c419 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -28,7 +28,7 @@
 # -- Project information -----------------------------------------------------
 
 project = "Data Parallel Extension for NumPy"
-copyright = "2020-2023, Intel Corporation"
+copyright = "2020-2024, Intel Corporation"
 author = "Intel"
 
 version = dpnp.__version__.strip(".dirty")
diff --git a/doc/docstring_template.py b/doc/docstring_template.py
index 60a83c181a6..2239d97e227 100644
--- a/doc/docstring_template.py
+++ b/doc/docstring_template.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/doc/reference/math.rst b/doc/reference/math.rst
index 430ce5b6de6..2b8b2befd79 100644
--- a/doc/reference/math.rst
+++ b/doc/reference/math.rst
@@ -205,6 +205,7 @@ Miscellaneous
    dpnp.sqrt
    dpnp.cbrt
    dpnp.square
+   dpnp.rsqrt
    dpnp.abs
    dpnp.absolute
    dpnp.fabs
diff --git a/doc/reference/ufunc.rst b/doc/reference/ufunc.rst
index 4c1e9f4d5d9..025692a0adf 100644
--- a/doc/reference/ufunc.rst
+++ b/doc/reference/ufunc.rst
@@ -44,8 +44,10 @@ Math operations
    dpnp.log1p
    dpnp.proj
    dpnp.sqrt
+   dpnp.cbrt
    dpnp.square
    dpnp.reciprocal
+   dpnp.rsqrt
    dpnp.gcd
    dpnp.lcm
 
diff --git a/dpnp/CMakeLists.txt b/dpnp/CMakeLists.txt
index ebaf1d7b0ef..dadfb9d476e 100644
--- a/dpnp/CMakeLists.txt
+++ b/dpnp/CMakeLists.txt
@@ -56,6 +56,7 @@ endfunction()
 
 build_dpnp_cython_ext_with_backend(dparray ${CMAKE_CURRENT_SOURCE_DIR}/dparray.pyx dpnp)
 add_subdirectory(backend)
+add_subdirectory(backend/extensions/blas)
 add_subdirectory(backend/extensions/lapack)
 add_subdirectory(backend/extensions/vm)
 add_subdirectory(backend/extensions/sycl_ext)
diff --git a/dpnp/__init__.py b/dpnp/__init__.py
index 370393b654a..165e57a1bb4 100644
--- a/dpnp/__init__.py
+++ b/dpnp/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/CMakeLists.txt b/dpnp/backend/CMakeLists.txt
index 18f084d5447..0cc04dbd0f4 100644
--- a/dpnp/backend/CMakeLists.txt
+++ b/dpnp/backend/CMakeLists.txt
@@ -1,5 +1,5 @@
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -107,6 +107,8 @@ target_compile_definitions(${_trgt} PUBLIC PSTL_USE_PARALLEL_POLICIES=0)
 target_compile_definitions(${_trgt} PUBLIC ONEDPL_USE_PREDEFINED_POLICIES=0)
 
 target_include_directories(${_trgt} PUBLIC ${Dpctl_INCLUDE_DIR})
+target_include_directories(${_trgt} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
+
 target_link_directories(${_trgt} PUBLIC "${Dpctl_INCLUDE_DIR}/..")
 target_link_libraries(${_trgt} PUBLIC DPCTLSyclInterface)
 
diff --git a/dpnp/backend/cmake/Modules/MKLConfig.cmake b/dpnp/backend/cmake/Modules/MKLConfig.cmake
index fa6f80a19b6..3458b94c29b 100644
--- a/dpnp/backend/cmake/Modules/MKLConfig.cmake
+++ b/dpnp/backend/cmake/Modules/MKLConfig.cmake
@@ -1,5 +1,5 @@
 #===============================================================================
-# Copyright 2021-2022 Intel Corporation.
+# Copyright (c) 2021-2024 Intel Corporation.
 #
 # This software and the related documents are Intel copyrighted  materials,  and
 # your use of  them is  governed by the  express license  under which  they were
diff --git a/dpnp/backend/cmake/Modules/TBBConfig.cmake b/dpnp/backend/cmake/Modules/TBBConfig.cmake
index 5363c1d3808..e7bd40b6b3c 100644
--- a/dpnp/backend/cmake/Modules/TBBConfig.cmake
+++ b/dpnp/backend/cmake/Modules/TBBConfig.cmake
@@ -1,4 +1,4 @@
-# Copyright (c) 2017-2023 Intel Corporation
+# Copyright (c) 2017-2024 Intel Corporation
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
diff --git a/dpnp/backend/examples/example10.cpp b/dpnp/backend/examples/example10.cpp
index ac646a7fad9..279854a57ce 100644
--- a/dpnp/backend/examples/example10.cpp
+++ b/dpnp/backend/examples/example10.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/examples/example11.cpp b/dpnp/backend/examples/example11.cpp
index 0af139ba6ee..52fce9beb3b 100644
--- a/dpnp/backend/examples/example11.cpp
+++ b/dpnp/backend/examples/example11.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/examples/example3.cpp b/dpnp/backend/examples/example3.cpp
index 2d9f3eb0c1a..48d8acb514c 100644
--- a/dpnp/backend/examples/example3.cpp
+++ b/dpnp/backend/examples/example3.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/examples/example5.cpp b/dpnp/backend/examples/example5.cpp
index 88c9ddf6cab..368f5fe441b 100644
--- a/dpnp/backend/examples/example5.cpp
+++ b/dpnp/backend/examples/example5.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/examples/example7.cpp b/dpnp/backend/examples/example7.cpp
index 2c7bf8b4def..df4c7eacf85 100644
--- a/dpnp/backend/examples/example7.cpp
+++ b/dpnp/backend/examples/example7.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/examples/example8.cpp b/dpnp/backend/examples/example8.cpp
index ffc18a99f42..9970e9803b7 100644
--- a/dpnp/backend/examples/example8.cpp
+++ b/dpnp/backend/examples/example8.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/examples/example9.cpp b/dpnp/backend/examples/example9.cpp
index ecde823fb83..7aa7bf30737 100644
--- a/dpnp/backend/examples/example9.cpp
+++ b/dpnp/backend/examples/example9.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/examples/example_bs.cpp b/dpnp/backend/examples/example_bs.cpp
index fe25ebe8a20..847877c9822 100644
--- a/dpnp/backend/examples/example_bs.cpp
+++ b/dpnp/backend/examples/example_bs.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/examples/example_experimental_iface.cpp b/dpnp/backend/examples/example_experimental_iface.cpp
index dfb343c9ad5..4454a34b9a4 100644
--- a/dpnp/backend/examples/example_experimental_iface.cpp
+++ b/dpnp/backend/examples/example_experimental_iface.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/blas/CMakeLists.txt b/dpnp/backend/extensions/blas/CMakeLists.txt
new file mode 100644
index 00000000000..d19f60c9792
--- /dev/null
+++ b/dpnp/backend/extensions/blas/CMakeLists.txt
@@ -0,0 +1,83 @@
+# *****************************************************************************
+# Copyright (c) 2016-2023, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+
+set(python_module_name _blas_impl)
+set(_module_src
+    ${CMAKE_CURRENT_SOURCE_DIR}/blas_py.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/gemm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/gemm_batch.cpp
+)
+
+pybind11_add_module(${python_module_name} MODULE ${_module_src})
+add_sycl_to_target(TARGET ${python_module_name} SOURCES ${_module_src})
+
+if (WIN32)
+    if (${CMAKE_VERSION} VERSION_LESS "3.27")
+        # this is a work-around for target_link_options inserting option after -link option, cause
+        # linker to ignore it.
+        set(CMAKE_CXX_LINK_FLAGS "${CMAKE_CXX_LINK_FLAGS} -fsycl-device-code-split=per_kernel")
+    endif()
+endif()
+
+set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
+target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
+
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
+
+if (WIN32)
+  target_compile_options(${python_module_name} PRIVATE
+    /clang:-fno-approx-func
+    /clang:-fno-finite-math-only
+    )
+else()
+  target_compile_options(${python_module_name} PRIVATE
+    -fno-approx-func
+    -fno-finite-math-only
+    )
+endif()
+
+target_link_options(${python_module_name} PUBLIC -fsycl-device-code-split=per_kernel)
+if (UNIX)
+    # this option is support on Linux only
+    target_link_options(${python_module_name} PUBLIC -fsycl-link-huge-device-code)
+endif()
+
+if (DPNP_GENERATE_COVERAGE)
+    target_link_options(${python_module_name} PRIVATE -fprofile-instr-generate -fcoverage-mapping)
+endif()
+
+if (MKL_VERSION_2024)
+    target_link_libraries(${python_module_name} PUBLIC MKL::MKL_SYCL::BLAS)
+else()
+    target_link_libraries(${python_module_name} PUBLIC MKL::MKL_DPCPP)
+endif()
+
+install(TARGETS ${python_module_name}
+  DESTINATION "dpnp/backend/extensions/blas"
+)
diff --git a/dpnp/backend/extensions/blas/blas_py.cpp b/dpnp/backend/extensions/blas/blas_py.cpp
new file mode 100644
index 00000000000..524f16fcc7d
--- /dev/null
+++ b/dpnp/backend/extensions/blas/blas_py.cpp
@@ -0,0 +1,66 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+//
+// This file defines functions of dpnp.backend._lapack_impl extensions
+//
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+
+#include "gemm.hpp"
+
+namespace blas_ext = dpnp::backend::ext::blas;
+namespace py = pybind11;
+
+// populate dispatch tables
+void init_dispatch_tables(void)
+{
+    blas_ext::init_gemm_batch_dispatch_table();
+    blas_ext::init_gemm_dispatch_table();
+}
+
+PYBIND11_MODULE(_blas_impl, m)
+{
+    init_dispatch_tables();
+
+    {
+        m.def("_gemm", &blas_ext::gemm,
+              "Call `gemm` from OneMKL LAPACK library to return "
+              "the matrix-matrix product with 2-D matrices.",
+              py::arg("sycl_queue"), py::arg("matrixA"), py::arg("matrixB"),
+              py::arg("result"), py::arg("depends") = py::list());
+    }
+
+    {
+        m.def("_gemm_batch", &blas_ext::gemm_batch,
+              "Call `gemm_batch` from OneMKL LAPACK library to return "
+              "the matrix-matrix product for a batch of 2-D matrices.",
+              py::arg("sycl_queue"), py::arg("matrixA"), py::arg("matrixB"),
+              py::arg("result"), py::arg("batch_size"), py::arg("stridea"),
+              py::arg("strideb"), py::arg("stridec"),
+              py::arg("depends") = py::list());
+    }
+}
diff --git a/dpnp/backend/extensions/blas/gemm.cpp b/dpnp/backend/extensions/blas/gemm.cpp
new file mode 100644
index 00000000000..5526ecd3c1b
--- /dev/null
+++ b/dpnp/backend/extensions/blas/gemm.cpp
@@ -0,0 +1,263 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "gemm.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace blas
+{
+namespace mkl_blas = oneapi::mkl::blas;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*gemm_impl_fn_ptr_t)(sycl::queue,
+                                          oneapi::mkl::transpose,
+                                          oneapi::mkl::transpose,
+                                          const std::int64_t,
+                                          const std::int64_t,
+                                          const std::int64_t,
+                                          char *,
+                                          const std::int64_t,
+                                          char *,
+                                          const std::int64_t,
+                                          char *,
+                                          const std::int64_t,
+                                          const std::vector<sycl::event> &);
+
+static gemm_impl_fn_ptr_t gemm_dispatch_table[dpctl_td_ns::num_types]
+                                             [dpctl_td_ns::num_types];
+
+template <typename Tab, typename Tc>
+static sycl::event gemm_impl(sycl::queue exec_q,
+                             oneapi::mkl::transpose transA,
+                             oneapi::mkl::transpose transB,
+                             const std::int64_t m,
+                             const std::int64_t n,
+                             const std::int64_t k,
+                             char *matrixA,
+                             const std::int64_t lda,
+                             char *matrixB,
+                             const std::int64_t ldb,
+                             char *resultC,
+                             const std::int64_t ldc,
+                             const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<Tab>(exec_q);
+    type_utils::validate_type_for_device<Tc>(exec_q);
+
+    Tab *a = reinterpret_cast<Tab *>(matrixA);
+    Tab *b = reinterpret_cast<Tab *>(matrixB);
+    Tc *res = reinterpret_cast<Tc *>(resultC);
+
+    std::stringstream error_msg;
+    bool is_exception_caught = false;
+
+    sycl::event gemm_event;
+    try {
+        gemm_event = mkl_blas::row_major::gemm(
+            exec_q,
+            transA, // Defines the transpose operation for matrix A:
+                    // 'N' indicates no transpose, 'T' for transpose,
+                    // or 'C' for a conjugate transpose.
+            transB, // Same as transA but for matrix B.
+            m,      // Number of rows in matrices A and C.
+            n,      // Number of columns in matrices B and C.
+            k,      // Number of columns in matrix A and rows in matrix B.
+            Tab(1), // Scaling factor for the product of matrices A and B.
+            a,      // Pointer to matrix A.
+            lda,    // Leading dimension of matrix A, which is the
+                    // stride between successive rows (for row major
+                    // layout).
+            b,      // Pointer to matrix B.
+            ldb,    // Leading dimension of matrix B, similar to lda.
+            Tab(0), // Scaling factor for matrix C.
+            res,    // Pointer to matrix C, where the result is stored.
+            ldc,    // Leading dimension of matrix C.
+            depends);
+    } catch (oneapi::mkl::exception const &e) {
+        error_msg
+            << "Unexpected MKL exception caught during gemm() call:\nreason: "
+            << e.what();
+        is_exception_caught = true;
+    } catch (sycl::exception const &e) {
+        error_msg << "Unexpected SYCL exception caught during gemm() call:\n"
+                  << e.what();
+        is_exception_caught = true;
+    }
+
+    if (is_exception_caught) // an unexpected error occurs
+    {
+        throw std::runtime_error(error_msg.str());
+    }
+
+    return gemm_event;
+}
+
+std::pair<sycl::event, sycl::event>
+    gemm(sycl::queue exec_q,
+         dpctl::tensor::usm_ndarray matrixA,
+         dpctl::tensor::usm_ndarray matrixB,
+         dpctl::tensor::usm_ndarray resultC,
+         const std::vector<sycl::event> &depends)
+{
+    const int matrixA_nd = matrixA.get_ndim();
+    const int matrixB_nd = matrixB.get_ndim();
+    const int resultC_nd = resultC.get_ndim();
+
+    if ((matrixA_nd != 2) || (matrixB_nd != 2) || (resultC_nd != 2)) {
+        throw py::value_error("The input matrices must be of 2 dimensions.");
+    }
+
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(matrixA, resultC)) {
+        throw py::value_error("Input array 1 and output array are overlapping "
+                              "segments of memory");
+    }
+    if (overlap(matrixB, resultC)) {
+        throw py::value_error("Input array 2 and output array are overlapping "
+                              "segments of memory");
+    }
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(
+            exec_q,
+            {matrixA.get_queue(), matrixB.get_queue(), resultC.get_queue()}))
+    {
+        throw py::value_error(
+            "USM allocations are not compatible with the execution queue.");
+    }
+
+    bool is_matrixA_f_contig = matrixA.is_f_contiguous();
+    bool is_matrixB_f_contig = matrixB.is_f_contiguous();
+    bool is_matrixA_c_contig = matrixA.is_c_contiguous();
+    bool is_matrixB_c_contig = matrixB.is_c_contiguous();
+
+    if (!is_matrixA_f_contig and !is_matrixA_c_contig) {
+        throw py::value_error(
+            "Input array 1 is not c-contiguous nor f-contiguous.");
+    }
+    if (!is_matrixB_f_contig and !is_matrixB_c_contig) {
+        throw py::value_error(
+            "Input array 2 is not c-contiguous nor f-contiguous.");
+    }
+
+    const py::ssize_t *a_shape = matrixA.get_shape_raw();
+    const py::ssize_t *b_shape = matrixB.get_shape_raw();
+    const py::ssize_t *res_shape = resultC.get_shape_raw();
+
+    if (a_shape[1] != b_shape[0]) {
+        throw py::value_error("The number of columns in A must be equal to "
+                              "the number of rows in B.");
+    }
+
+    oneapi::mkl::transpose transA = is_matrixA_f_contig
+                                        ? oneapi::mkl::transpose::T
+                                        : oneapi::mkl::transpose::N;
+    oneapi::mkl::transpose transB = is_matrixB_f_contig
+                                        ? oneapi::mkl::transpose::T
+                                        : oneapi::mkl::transpose::N;
+
+    const std::int64_t m = a_shape[0];
+    const std::int64_t n = b_shape[1];
+    const std::int64_t k = a_shape[1];
+
+    const std::int64_t lda =
+        (transA == oneapi::mkl::transpose::N) ? a_shape[1] : a_shape[0];
+    const std::int64_t ldb =
+        (transB == oneapi::mkl::transpose::N) ? b_shape[1] : b_shape[0];
+    const std::int64_t ldc = res_shape[1];
+
+    int matrixA_typenum = matrixA.get_typenum();
+    int matrixB_typenum = matrixB.get_typenum();
+    int resultC_typenum = resultC.get_typenum();
+
+    if (matrixA_typenum != matrixB_typenum) {
+        throw py::value_error("matrixA and matrixB must be of the same type.");
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int matrixAB_type_id = array_types.typenum_to_lookup_id(matrixA_typenum);
+    int resultC_type_id = array_types.typenum_to_lookup_id(resultC_typenum);
+
+    gemm_impl_fn_ptr_t gemm_fn =
+        gemm_dispatch_table[matrixAB_type_id][resultC_type_id];
+    if (gemm_fn == nullptr) {
+        throw py::value_error(
+            "Types of input matrices and result matrix are mismatched.");
+    }
+
+    char *a_typeless_ptr = matrixA.get_data();
+    char *b_typeless_ptr = matrixB.get_data();
+    char *r_typeless_ptr = resultC.get_data();
+
+    sycl::event gemm_ev =
+        gemm_fn(exec_q, transA, transB, m, n, k, a_typeless_ptr, lda,
+                b_typeless_ptr, ldb, r_typeless_ptr, ldc, depends);
+
+    sycl::event args_ev = dpctl::utils::keep_args_alive(
+        exec_q, {matrixA, matrixB, resultC}, {gemm_ev});
+
+    return std::make_pair(args_ev, gemm_ev);
+}
+
+template <typename fnT, typename Tab, typename Tc>
+struct GemmContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::GemmTypePairSupportFactory<Tab, Tc>::is_defined) {
+            return gemm_impl<Tab, Tc>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_gemm_dispatch_table(void)
+{
+    dpctl_td_ns::DispatchTableBuilder<gemm_impl_fn_ptr_t, GemmContigFactory,
+                                      dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_table(gemm_dispatch_table);
+}
+} // namespace blas
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/blas/gemm.hpp b/dpnp/backend/extensions/blas/gemm.hpp
new file mode 100644
index 00000000000..25f78b5b850
--- /dev/null
+++ b/dpnp/backend/extensions/blas/gemm.hpp
@@ -0,0 +1,64 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+#include <oneapi/mkl.hpp>
+
+#include <dpctl4pybind11.hpp>
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace blas
+{
+extern std::pair<sycl::event, sycl::event>
+    gemm(sycl::queue exec_q,
+         dpctl::tensor::usm_ndarray matrixA,
+         dpctl::tensor::usm_ndarray matrixB,
+         dpctl::tensor::usm_ndarray resultC,
+         const std::vector<sycl::event> &depends);
+
+extern std::pair<sycl::event, sycl::event>
+    gemm_batch(sycl::queue exec_q,
+               dpctl::tensor::usm_ndarray matrixA,
+               dpctl::tensor::usm_ndarray matrixB,
+               dpctl::tensor::usm_ndarray resultC,
+               const std::int64_t batch_size,
+               size_t stridea,
+               size_t strideb,
+               size_t stridec,
+               const std::vector<sycl::event> &depends);
+
+extern void init_gemm_dispatch_table(void);
+extern void init_gemm_batch_dispatch_table(void);
+} // namespace blas
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/blas/gemm_batch.cpp b/dpnp/backend/extensions/blas/gemm_batch.cpp
new file mode 100644
index 00000000000..32f592f6b8a
--- /dev/null
+++ b/dpnp/backend/extensions/blas/gemm_batch.cpp
@@ -0,0 +1,253 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "gemm.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace blas
+{
+namespace mkl_blas = oneapi::mkl::blas;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*gemm_batch_impl_fn_ptr_t)(
+    sycl::queue,
+    const std::int64_t,
+    const std::int64_t,
+    const std::int64_t,
+    const std::int64_t,
+    const std::int64_t,
+    const std::int64_t,
+    const std::int64_t,
+    size_t,
+    size_t,
+    size_t,
+    oneapi::mkl::transpose,
+    oneapi::mkl::transpose,
+    char *,
+    char *,
+    char *,
+    const std::vector<sycl::event> &);
+
+static gemm_batch_impl_fn_ptr_t
+    gemm_batch_dispatch_table[dpctl_td_ns::num_types][dpctl_td_ns::num_types];
+
+template <typename Tab, typename Tc>
+static sycl::event gemm_batch_impl(sycl::queue exec_q,
+                                   const std::int64_t m,
+                                   const std::int64_t n,
+                                   const std::int64_t k,
+                                   const std::int64_t batch_size,
+                                   const std::int64_t lda,
+                                   const std::int64_t ldb,
+                                   const std::int64_t ld_result,
+                                   size_t stridea,
+                                   size_t strideb,
+                                   size_t stridec,
+                                   oneapi::mkl::transpose transA,
+                                   oneapi::mkl::transpose transB,
+                                   char *matrixA,
+                                   char *matrixB,
+                                   char *resultC,
+                                   const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<Tab>(exec_q);
+    type_utils::validate_type_for_device<Tc>(exec_q);
+
+    Tab *a = reinterpret_cast<Tab *>(matrixA);
+    Tab *b = reinterpret_cast<Tab *>(matrixB);
+    Tc *res = reinterpret_cast<Tc *>(resultC);
+
+    std::stringstream error_msg;
+    bool is_exception_caught = false;
+
+    sycl::event gemm_batch_event;
+    try {
+        gemm_batch_event = mkl_blas::row_major::gemm_batch(
+            exec_q,
+            transA,     // Defines the transpose operation for matrix A:
+                        // 'N' indicates no transpose, 'T' for transpose,
+                        // or 'C' for a conjugate transpose.
+            transB,     // Same as transA but for matrix B.
+            m,          // Number of rows in matrices A and C.
+            n,          // Number of columns in matrices B and C.
+            k,          // Number of columns in matrix A and rows in matrix B.
+            Tab(1),     // Scaling factor for the product of matrices A and B.
+            a,          // Pointer to matrix A.
+            lda,        // Leading dimension of matrix A, which is the
+                        // stride between successive rows (for row major
+                        // layout).
+            stridea,    // Stride between different A matrices.
+            b,          // Pointer to matrix B.
+            ldb,        // Leading dimension of matrix B, similar to lda.
+            strideb,    // Stride between different B matrices.
+            Tab(0),     // Scaling factor for matrix C.
+            res,        // Pointer to matrix C, where the result is stored.
+            ld_result,  // Leading dimension of matrix C.
+            stridec,    // Stride between different C matrices.
+            batch_size, // Specifies the number of matrix multiply operations to
+                        // perform.
+            depends);
+    } catch (oneapi::mkl::exception const &e) {
+        error_msg << "Unexpected MKL exception caught during gemm_batch() "
+                     "call:\nreason: "
+                  << e.what();
+        is_exception_caught = true;
+    } catch (sycl::exception const &e) {
+        error_msg
+            << "Unexpected SYCL exception caught during gemm_batch() call:\n"
+            << e.what();
+        is_exception_caught = true;
+    }
+
+    if (is_exception_caught) // an unexpected error occurs
+    {
+        throw std::runtime_error(error_msg.str());
+    }
+
+    return gemm_batch_event;
+}
+
+std::pair<sycl::event, sycl::event>
+    gemm_batch(sycl::queue exec_q,
+               dpctl::tensor::usm_ndarray matrixA,
+               dpctl::tensor::usm_ndarray matrixB,
+               dpctl::tensor::usm_ndarray resultC,
+               const std::int64_t batch_size,
+               size_t stridea,
+               size_t strideb,
+               size_t stridec,
+               const std::vector<sycl::event> &depends = {})
+{
+    if (!dpctl::utils::queues_are_compatible(
+            exec_q,
+            {matrixA.get_queue(), matrixB.get_queue(), resultC.get_queue()}))
+    {
+        throw py::value_error(
+            "USM allocations are not compatible with the execution queue.");
+    }
+
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(matrixA, resultC)) {
+        throw py::value_error("Input array 1 and output array are overlapping "
+                              "segments of memory");
+    }
+    if (overlap(matrixB, resultC)) {
+        throw py::value_error("Input array 2 and output array are overlapping "
+                              "segments of memory");
+    }
+
+    const int matrixA_nd = matrixA.get_ndim();
+    const int matrixB_nd = matrixB.get_ndim();
+    const py::ssize_t *a_shape = matrixA.get_shape_raw();
+    const py::ssize_t *b_shape = matrixB.get_shape_raw();
+
+    if (a_shape[matrixA_nd - 1] != b_shape[matrixB_nd - 2]) {
+        throw py::value_error("The number of columns in A must be equal to "
+                              "the number of rows in B.");
+    }
+
+    const std::int64_t m = a_shape[matrixA_nd - 2];
+    const std::int64_t n = b_shape[matrixB_nd - 1];
+    const std::int64_t k = a_shape[matrixA_nd - 1];
+
+    // transA and transB are always True
+    oneapi::mkl::transpose transA = oneapi::mkl::transpose::N;
+    oneapi::mkl::transpose transB = oneapi::mkl::transpose::N;
+
+    int matrixA_typenum = matrixA.get_typenum();
+    int matrixB_typenum = matrixB.get_typenum();
+    int resultC_typenum = resultC.get_typenum();
+
+    if (matrixA_typenum != matrixB_typenum) {
+        throw py::value_error("matrixA and matrixB must be of the same type.");
+    }
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int matrixAB_type_id = array_types.typenum_to_lookup_id(matrixA_typenum);
+    int resultC_type_id = array_types.typenum_to_lookup_id(resultC_typenum);
+
+    gemm_batch_impl_fn_ptr_t gemm_batch_fn =
+        gemm_batch_dispatch_table[matrixAB_type_id][resultC_type_id];
+    if (gemm_batch_fn == nullptr) {
+        throw py::value_error(
+            "Types of input matrices and result matrix are mismatched.");
+    }
+
+    char *a_typeless_ptr = matrixA.get_data();
+    char *b_typeless_ptr = matrixB.get_data();
+    char *r_typeless_ptr = resultC.get_data();
+
+    // Note that lda = k, ldb = n, and ld_result = n
+    sycl::event gemm_batch_ev = gemm_batch_fn(
+        exec_q, m, n, k, batch_size, k, n, n, stridea, strideb, stridec, transA,
+        transB, a_typeless_ptr, b_typeless_ptr, r_typeless_ptr, depends);
+
+    sycl::event args_batch_ev = dpctl::utils::keep_args_alive(
+        exec_q, {matrixA, matrixB, resultC}, {gemm_batch_ev});
+
+    return std::make_pair(args_batch_ev, gemm_batch_ev);
+}
+
+template <typename fnT, typename Tab, typename Tc>
+struct GemmBatchContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::GemmBatchTypePairSupportFactory<Tab,
+                                                             Tc>::is_defined) {
+            return gemm_batch_impl<Tab, Tc>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_gemm_batch_dispatch_table(void)
+{
+    dpctl_td_ns::DispatchTableBuilder<gemm_batch_impl_fn_ptr_t,
+                                      GemmBatchContigFactory,
+                                      dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_table(gemm_batch_dispatch_table);
+}
+} // namespace blas
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/blas/types_matrix.hpp b/dpnp/backend/extensions/blas/types_matrix.hpp
new file mode 100644
index 00000000000..49154df03c4
--- /dev/null
+++ b/dpnp/backend/extensions/blas/types_matrix.hpp
@@ -0,0 +1,109 @@
+//*****************************************************************************
+// Copyright (c) 2023, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <type_traits>
+
+// dpctl tensor headers
+#include "utils/type_dispatch.hpp"
+
+// dpctl namespace for operations with types
+namespace dpctl_td_ns = dpctl::tensor::type_dispatch;
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace blas
+{
+namespace types
+{
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL BLAS library provides support in oneapi::mkl::blas::gemm<Tab, Tc>
+ * function.
+ *
+ * @tparam Tab Type of arrays containing input matrices A and B.
+ * @tparam Tc Type of array containing output matrix C.
+ */
+template <typename Tab, typename Tc>
+struct GemmTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<Tab, std::int8_t, Tc, std::int32_t>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab, std::int8_t, Tc, float>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab, sycl::half, Tc, float>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab, sycl::half, Tc, sycl::half>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab, float, Tc, float>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab, double, Tc, double>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab,
+                                          std::complex<float>,
+                                          Tc,
+                                          std::complex<float>>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab,
+                                          std::complex<double>,
+                                          Tc,
+                                          std::complex<double>>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL BLAS library provides support in
+ * oneapi::mkl::blas::gemm_batch<Tab, Tc> function.
+ *
+ * @tparam Tab Type of arrays containing input matrices A and B.
+ * @tparam Tc Type of array containing output matrix C.
+ */
+template <typename Tab, typename Tc>
+struct GemmBatchTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<Tab, std::int8_t, Tc, std::int32_t>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab, std::int8_t, Tc, float>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab, sycl::half, Tc, float>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab, sycl::half, Tc, sycl::half>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab, float, Tc, float>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab, double, Tc, double>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab,
+                                          std::complex<float>,
+                                          Tc,
+                                          std::complex<float>>,
+        dpctl_td_ns::TypePairDefinedEntry<Tab,
+                                          std::complex<double>,
+                                          Tc,
+                                          std::complex<double>>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
+} // namespace types
+} // namespace blas
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/CMakeLists.txt b/dpnp/backend/extensions/lapack/CMakeLists.txt
index d224c623c8c..626615e3e53 100644
--- a/dpnp/backend/extensions/lapack/CMakeLists.txt
+++ b/dpnp/backend/extensions/lapack/CMakeLists.txt
@@ -1,5 +1,5 @@
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -28,7 +28,12 @@ set(python_module_name _lapack_impl)
 set(_module_src
     ${CMAKE_CURRENT_SOURCE_DIR}/lapack_py.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/gesv.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/getrf.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/getrf_batch.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/getri_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/heevd.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/potrf.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/potrf_batch.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/syevd.cpp
 )
 
@@ -48,7 +53,7 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
 
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
 
 if (WIN32)
diff --git a/dpnp/backend/extensions/lapack/common_helpers.hpp b/dpnp/backend/extensions/lapack/common_helpers.hpp
index 2f3815320ca..3e840ddf7f3 100644
--- a/dpnp/backend/extensions/lapack/common_helpers.hpp
+++ b/dpnp/backend/extensions/lapack/common_helpers.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/lapack/gesv.cpp b/dpnp/backend/extensions/lapack/gesv.cpp
index 72e5aa80671..08a544f18f9 100644
--- a/dpnp/backend/extensions/lapack/gesv.cpp
+++ b/dpnp/backend/extensions/lapack/gesv.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -84,7 +84,7 @@ static sycl::event gesv_impl(sycl::queue exec_q,
 
     std::stringstream error_msg;
     std::int64_t info = 0;
-    bool sycl_exception_caught = false;
+    bool is_exception_caught = false;
 
     sycl::event gesv_event;
     try {
@@ -106,12 +106,18 @@ static sycl::event gesv_impl(sycl::queue exec_q,
                         // routine for storing intermediate results.
             scratchpad_size, depends);
     } catch (mkl_lapack::exception const &e) {
+        is_exception_caught = true;
         info = e.info();
 
         if (info < 0) {
             error_msg << "Parameter number " << -info
                       << " had an illegal value.";
         }
+        else if (info == scratchpad_size && e.detail() != 0) {
+            error_msg
+                << "Insufficient scratchpad size. Required size is at least "
+                << e.detail();
+        }
         else if (info > 0) {
             T host_U;
             exec_q.memcpy(&host_U, &a[(info - 1) * lda + info - 1], sizeof(T))
@@ -131,23 +137,18 @@ static sycl::event gesv_impl(sycl::queue exec_q,
                           << e.what() << "\ninfo: " << e.info();
             }
         }
-        else if (info == scratchpad_size && e.detail() != 0) {
-            error_msg
-                << "Insufficient scratchpad size. Required size is at least "
-                << e.detail();
-        }
         else {
             error_msg << "Unexpected MKL exception caught during gesv() "
                          "call:\nreason: "
                       << e.what() << "\ninfo: " << e.info();
         }
     } catch (sycl::exception const &e) {
+        is_exception_caught = true;
         error_msg << "Unexpected SYCL exception caught during gesv() call:\n"
                   << e.what();
-        sycl_exception_caught = true;
     }
 
-    if (info != 0 || sycl_exception_caught) // an unexpected error occurs
+    if (is_exception_caught) // an unexpected error occurs
     {
         if (scratchpad != nullptr) {
             sycl::free(scratchpad, exec_q);
diff --git a/dpnp/backend/extensions/lapack/gesv.hpp b/dpnp/backend/extensions/lapack/gesv.hpp
index 24ac0d2e5be..12486fae787 100644
--- a/dpnp/backend/extensions/lapack/gesv.hpp
+++ b/dpnp/backend/extensions/lapack/gesv.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/lapack/getrf.cpp b/dpnp/backend/extensions/lapack/getrf.cpp
new file mode 100644
index 00000000000..f97d395bcd6
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/getrf.cpp
@@ -0,0 +1,256 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "getrf.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*getrf_impl_fn_ptr_t)(sycl::queue,
+                                           const std::int64_t,
+                                           char *,
+                                           std::int64_t,
+                                           std::int64_t *,
+                                           py::list,
+                                           std::vector<sycl::event> &,
+                                           const std::vector<sycl::event> &);
+
+static getrf_impl_fn_ptr_t getrf_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event getrf_impl(sycl::queue exec_q,
+                              const std::int64_t n,
+                              char *in_a,
+                              std::int64_t lda,
+                              std::int64_t *ipiv,
+                              py::list dev_info,
+                              std::vector<sycl::event> &host_task_events,
+                              const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+
+    const std::int64_t scratchpad_size =
+        mkl_lapack::getrf_scratchpad_size<T>(exec_q, n, n, lda);
+    T *scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+    bool is_exception_caught = false;
+
+    sycl::event getrf_event;
+    try {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        getrf_event = mkl_lapack::getrf(
+            exec_q,
+            n,    // The order of the square matrix A (0 ≤ n).
+                  // It must be a non-negative integer.
+            n,    // The number of columns in the square matrix A (0 ≤ n).
+                  // It must be a non-negative integer.
+            a,    // Pointer to the square matrix A (n x n).
+            lda,  // The leading dimension of matrix A.
+                  // It must be at least max(1, n).
+            ipiv, // Pointer to the output array of pivot indices.
+            scratchpad, // Pointer to scratchpad memory to be used by MKL
+                        // routine for storing intermediate results.
+            scratchpad_size, depends);
+    } catch (mkl_lapack::exception const &e) {
+        is_exception_caught = true;
+        info = e.info();
+
+        if (info < 0) {
+            error_msg << "Parameter number " << -info
+                      << " had an illegal value.";
+        }
+        else if (info == scratchpad_size && e.detail() != 0) {
+            error_msg
+                << "Insufficient scratchpad size. Required size is at least "
+                << e.detail();
+        }
+        else if (info > 0) {
+            // Store the positive 'info' value in the first element of
+            // 'dev_info'. This indicates that the factorization has been
+            // completed, but the factor U (upper triangular matrix) is exactly
+            // singular. The 'info' value here is the index of the first zero
+            // element in the diagonal of U.
+            is_exception_caught = false;
+            dev_info[0] = info;
+        }
+        else {
+            error_msg << "Unexpected MKL exception caught during getrf() "
+                         "call:\nreason: "
+                      << e.what() << "\ninfo: " << e.info();
+        }
+    } catch (sycl::exception const &e) {
+        is_exception_caught = true;
+        error_msg << "Unexpected SYCL exception caught during getrf() call:\n"
+                  << e.what();
+    }
+
+    if (is_exception_caught) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(getrf_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+    return getrf_event;
+}
+
+std::pair<sycl::event, sycl::event>
+    getrf(sycl::queue exec_q,
+          dpctl::tensor::usm_ndarray a_array,
+          dpctl::tensor::usm_ndarray ipiv_array,
+          py::list dev_info,
+          const std::vector<sycl::event> &depends)
+{
+    const int a_array_nd = a_array.get_ndim();
+    const int ipiv_array_nd = ipiv_array.get_ndim();
+
+    if (a_array_nd != 2) {
+        throw py::value_error(
+            "The input array has ndim=" + std::to_string(a_array_nd) +
+            ", but a 2-dimensional array is expected.");
+    }
+
+    if (ipiv_array_nd != 1) {
+        throw py::value_error("The array of pivot indices has ndim=" +
+                              std::to_string(ipiv_array_nd) +
+                              ", but a 1-dimensional array is expected.");
+    }
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(a_array, ipiv_array)) {
+        throw py::value_error("The input array and the array of pivot indices "
+                              "are overlapping segments of memory");
+    }
+
+    bool is_a_array_c_contig = a_array.is_c_contiguous();
+    bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();
+    if (!is_a_array_c_contig) {
+        throw py::value_error("The input array "
+                              "must be C-contiguous");
+    }
+    if (!is_ipiv_array_c_contig) {
+        throw py::value_error("The array of pivot indices "
+                              "must be C-contiguous");
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int a_array_type_id =
+        array_types.typenum_to_lookup_id(a_array.get_typenum());
+
+    getrf_impl_fn_ptr_t getrf_fn = getrf_dispatch_vector[a_array_type_id];
+    if (getrf_fn == nullptr) {
+        throw py::value_error(
+            "No getrf implementation defined for the provided type "
+            "of the input matrix.");
+    }
+
+    auto ipiv_types = dpctl_td_ns::usm_ndarray_types();
+    int ipiv_array_type_id =
+        ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());
+
+    if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {
+        throw py::value_error("The type of 'ipiv_array' must be int64.");
+    }
+
+    const std::int64_t n = a_array.get_shape_raw()[0];
+
+    char *a_array_data = a_array.get_data();
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+
+    char *ipiv_array_data = ipiv_array.get_data();
+    std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
+
+    std::vector<sycl::event> host_task_events;
+    sycl::event getrf_ev = getrf_fn(exec_q, n, a_array_data, lda, d_ipiv,
+                                    dev_info, host_task_events, depends);
+
+    sycl::event args_ev = dpctl::utils::keep_args_alive(
+        exec_q, {a_array, ipiv_array}, host_task_events);
+
+    return std::make_pair(args_ev, getrf_ev);
+}
+
+template <typename fnT, typename T>
+struct GetrfContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::GetrfTypePairSupportFactory<T>::is_defined) {
+            return getrf_impl<T>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_getrf_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<getrf_impl_fn_ptr_t, GetrfContigFactory,
+                                       dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_vector(getrf_dispatch_vector);
+}
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/getrf.hpp b/dpnp/backend/extensions/lapack/getrf.hpp
new file mode 100644
index 00000000000..fee9b209426
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/getrf.hpp
@@ -0,0 +1,64 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+#include <oneapi/mkl.hpp>
+
+#include <dpctl4pybind11.hpp>
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+extern std::pair<sycl::event, sycl::event>
+    getrf(sycl::queue exec_q,
+          dpctl::tensor::usm_ndarray a_array,
+          dpctl::tensor::usm_ndarray ipiv_array,
+          py::list dev_info,
+          const std::vector<sycl::event> &depends = {});
+
+extern std::pair<sycl::event, sycl::event>
+    getrf_batch(sycl::queue exec_q,
+                dpctl::tensor::usm_ndarray a_array,
+                dpctl::tensor::usm_ndarray ipiv_array,
+                py::list dev_info,
+                std::int64_t n,
+                std::int64_t stride_a,
+                std::int64_t stride_ipiv,
+                std::int64_t batch_size,
+                const std::vector<sycl::event> &depends = {});
+
+extern void init_getrf_dispatch_vector(void);
+extern void init_getrf_batch_dispatch_vector(void);
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/getrf_batch.cpp b/dpnp/backend/extensions/lapack/getrf_batch.cpp
new file mode 100644
index 00000000000..a8993121809
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/getrf_batch.cpp
@@ -0,0 +1,295 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "getrf.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*getrf_batch_impl_fn_ptr_t)(
+    sycl::queue,
+    std::int64_t,
+    char *,
+    std::int64_t,
+    std::int64_t,
+    std::int64_t *,
+    std::int64_t,
+    std::int64_t,
+    py::list,
+    std::vector<sycl::event> &,
+    const std::vector<sycl::event> &);
+
+static getrf_batch_impl_fn_ptr_t
+    getrf_batch_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event getrf_batch_impl(sycl::queue exec_q,
+                                    std::int64_t n,
+                                    char *in_a,
+                                    std::int64_t lda,
+                                    std::int64_t stride_a,
+                                    std::int64_t *ipiv,
+                                    std::int64_t stride_ipiv,
+                                    std::int64_t batch_size,
+                                    py::list dev_info,
+                                    std::vector<sycl::event> &host_task_events,
+                                    const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+
+    const std::int64_t scratchpad_size =
+        mkl_lapack::getrf_batch_scratchpad_size<T>(exec_q, n, n, lda, stride_a,
+                                                   stride_ipiv, batch_size);
+    T *scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+    bool is_exception_caught = false;
+
+    sycl::event getrf_batch_event;
+    try {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        getrf_batch_event = mkl_lapack::getrf_batch(
+            exec_q,
+            n, // The order of each square matrix in the batch; (0 ≤ n).
+               // It must be a non-negative integer.
+            n, // The number of columns in each matrix in the batch; (0 ≤ n).
+               // It must be a non-negative integer.
+            a, // Pointer to the batch of square matrices, each of size (n x n).
+            lda,      // The leading dimension of each matrix in the batch.
+            stride_a, // Stride between consecutive matrices in the batch.
+            ipiv, // Pointer to the array of pivot indices for each matrix in
+                  // the batch.
+            stride_ipiv, // Stride between pivot indices: Spacing between pivot
+                         // arrays in 'ipiv'.
+            batch_size,  // Stride between pivot index arrays in the batch.
+            scratchpad,  // Pointer to scratchpad memory to be used by MKL
+                         // routine for storing intermediate results.
+            scratchpad_size, depends);
+    } catch (mkl_lapack::batch_error const &be) {
+        // Get the indices of matrices within the batch that encountered an
+        // error
+        auto error_matrices_ids = be.ids();
+        // Get the indices of the first zero diagonal elements of these matrices
+        auto error_info = be.exceptions();
+
+        auto error_matrices_ids_size = error_matrices_ids.size();
+        auto dev_info_size = static_cast<std::size_t>(py::len(dev_info));
+        if (error_matrices_ids_size != dev_info_size) {
+            throw py::value_error("The size of `dev_info` must be equal to" +
+                                  std::to_string(error_matrices_ids_size) +
+                                  ", but currently it is " +
+                                  std::to_string(dev_info_size) + ".");
+        }
+
+        for (size_t i = 0; i < error_matrices_ids.size(); ++i) {
+            // Assign the index of the first zero diagonal element in each
+            // error matrix to the corresponding index in 'dev_info'
+            dev_info[error_matrices_ids[i]] = error_info[i];
+        }
+    } catch (mkl_lapack::exception const &e) {
+        is_exception_caught = true;
+        info = e.info();
+
+        if (info < 0) {
+            error_msg << "Parameter number " << -info
+                      << " had an illegal value.";
+        }
+        else if (info == scratchpad_size && e.detail() != 0) {
+            error_msg
+                << "Insufficient scratchpad size. Required size is at least "
+                << e.detail();
+        }
+        else {
+            error_msg << "Unexpected MKL exception caught during getrf_batch() "
+                         "call:\nreason: "
+                      << e.what() << "\ninfo: " << e.info();
+        }
+    } catch (sycl::exception const &e) {
+        is_exception_caught = true;
+        error_msg
+            << "Unexpected SYCL exception caught during getrf_batch() call:\n"
+            << e.what();
+    }
+
+    if (is_exception_caught) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(getrf_batch_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+    return getrf_batch_event;
+}
+
+std::pair<sycl::event, sycl::event>
+    getrf_batch(sycl::queue exec_q,
+                dpctl::tensor::usm_ndarray a_array,
+                dpctl::tensor::usm_ndarray ipiv_array,
+                py::list dev_info,
+                std::int64_t n,
+                std::int64_t stride_a,
+                std::int64_t stride_ipiv,
+                std::int64_t batch_size,
+                const std::vector<sycl::event> &depends)
+{
+    const int a_array_nd = a_array.get_ndim();
+    const int ipiv_array_nd = ipiv_array.get_ndim();
+
+    if (a_array_nd < 3) {
+        throw py::value_error(
+            "The input array has ndim=" + std::to_string(a_array_nd) +
+            ", but an array with ndim >= 3 is expected.");
+    }
+
+    if (ipiv_array_nd != 2) {
+        throw py::value_error("The array of pivot indices has ndim=" +
+                              std::to_string(ipiv_array_nd) +
+                              ", but a 2-dimensional array is expected.");
+    }
+
+    const int dev_info_size = py::len(dev_info);
+    if (dev_info_size != batch_size) {
+        throw py::value_error("The size of 'dev_info' (" +
+                              std::to_string(dev_info_size) +
+                              ") does not match the expected batch size (" +
+                              std::to_string(batch_size) + ").");
+    }
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(a_array, ipiv_array)) {
+        throw py::value_error("The input array and the array of pivot indices "
+                              "are overlapping segments of memory");
+    }
+
+    bool is_a_array_c_contig = a_array.is_c_contiguous();
+    bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();
+    if (!is_a_array_c_contig) {
+        throw py::value_error("The input array "
+                              "must be C-contiguous");
+    }
+    if (!is_ipiv_array_c_contig) {
+        throw py::value_error("The array of pivot indices "
+                              "must be C-contiguous");
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int a_array_type_id =
+        array_types.typenum_to_lookup_id(a_array.get_typenum());
+
+    getrf_batch_impl_fn_ptr_t getrf_batch_fn =
+        getrf_batch_dispatch_vector[a_array_type_id];
+    if (getrf_batch_fn == nullptr) {
+        throw py::value_error(
+            "No getrf_batch implementation defined for the provided type "
+            "of the input matrix.");
+    }
+
+    auto ipiv_types = dpctl_td_ns::usm_ndarray_types();
+    int ipiv_array_type_id =
+        ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());
+
+    if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {
+        throw py::value_error("The type of 'ipiv_array' must be int64.");
+    }
+
+    char *a_array_data = a_array.get_data();
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+
+    char *ipiv_array_data = ipiv_array.get_data();
+    std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
+
+    std::vector<sycl::event> host_task_events;
+    sycl::event getrf_batch_ev = getrf_batch_fn(
+        exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size,
+        dev_info, host_task_events, depends);
+
+    sycl::event args_ev = dpctl::utils::keep_args_alive(
+        exec_q, {a_array, ipiv_array}, host_task_events);
+
+    return std::make_pair(args_ev, getrf_batch_ev);
+}
+
+template <typename fnT, typename T>
+struct GetrfBatchContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::GetrfBatchTypePairSupportFactory<T>::is_defined) {
+            return getrf_batch_impl<T>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_getrf_batch_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<getrf_batch_impl_fn_ptr_t,
+                                       GetrfBatchContigFactory,
+                                       dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_vector(getrf_batch_dispatch_vector);
+}
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/getri.hpp b/dpnp/backend/extensions/lapack/getri.hpp
new file mode 100644
index 00000000000..75e9b16d4ef
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/getri.hpp
@@ -0,0 +1,56 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+#include <oneapi/mkl.hpp>
+
+#include <dpctl4pybind11.hpp>
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+extern std::pair<sycl::event, sycl::event>
+    getri_batch(sycl::queue exec_q,
+                dpctl::tensor::usm_ndarray a_array,
+                dpctl::tensor::usm_ndarray ipiv_array,
+                py::list dev_info,
+                std::int64_t n,
+                std::int64_t stride_a,
+                std::int64_t stride_ipiv,
+                std::int64_t batch_size,
+                const std::vector<sycl::event> &depends = {});
+
+extern void init_getri_batch_dispatch_vector(void);
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/getri_batch.cpp b/dpnp/backend/extensions/lapack/getri_batch.cpp
new file mode 100644
index 00000000000..c6315e29427
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/getri_batch.cpp
@@ -0,0 +1,293 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "getri.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*getri_batch_impl_fn_ptr_t)(
+    sycl::queue,
+    std::int64_t,
+    char *,
+    std::int64_t,
+    std::int64_t,
+    std::int64_t *,
+    std::int64_t,
+    std::int64_t,
+    py::list,
+    std::vector<sycl::event> &,
+    const std::vector<sycl::event> &);
+
+static getri_batch_impl_fn_ptr_t
+    getri_batch_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event getri_batch_impl(sycl::queue exec_q,
+                                    std::int64_t n,
+                                    char *in_a,
+                                    std::int64_t lda,
+                                    std::int64_t stride_a,
+                                    std::int64_t *ipiv,
+                                    std::int64_t stride_ipiv,
+                                    std::int64_t batch_size,
+                                    py::list dev_info,
+                                    std::vector<sycl::event> &host_task_events,
+                                    const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+
+    const std::int64_t scratchpad_size =
+        mkl_lapack::getri_batch_scratchpad_size<T>(exec_q, n, lda, stride_a,
+                                                   stride_ipiv, batch_size);
+    T *scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+    bool is_exception_caught = false;
+
+    sycl::event getri_batch_event;
+    try {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        getri_batch_event = mkl_lapack::getri_batch(
+            exec_q,
+            n, // The order of each square matrix in the batch; (0 ≤ n).
+               // It must be a non-negative integer.
+            a, // Pointer to the batch of square matrices, each of size (n x n).
+            lda,      // The leading dimension of each matrix in the batch.
+            stride_a, // Stride between consecutive matrices in the batch.
+            ipiv, // Pointer to the array of pivot indices for each matrix in
+                  // the batch.
+            stride_ipiv, // Stride between pivot indices: Spacing between pivot
+                         // arrays in 'ipiv'.
+            batch_size,  // Total number of matrices in the batch.
+            scratchpad,  // Pointer to scratchpad memory to be used by MKL
+                         // routine for storing intermediate results.
+            scratchpad_size, depends);
+    } catch (mkl_lapack::batch_error const &be) {
+        // Get the indices of matrices within the batch that encountered an
+        // error
+        auto error_matrices_ids = be.ids();
+        // Get the indices of the first zero diagonal elements of these matrices
+        auto error_info = be.exceptions();
+
+        auto error_matrices_ids_size = error_matrices_ids.size();
+        auto dev_info_size = static_cast<std::size_t>(py::len(dev_info));
+        if (error_matrices_ids_size != dev_info_size) {
+            throw py::value_error("The size of `dev_info` must be equal to" +
+                                  std::to_string(error_matrices_ids_size) +
+                                  ", but currently it is " +
+                                  std::to_string(dev_info_size) + ".");
+        }
+
+        for (size_t i = 0; i < error_matrices_ids.size(); ++i) {
+            // Assign the index of the first zero diagonal element in each
+            // error matrix to the corresponding index in 'dev_info'
+            dev_info[error_matrices_ids[i]] = error_info[i];
+        }
+    } catch (mkl_lapack::exception const &e) {
+        is_exception_caught = true;
+        info = e.info();
+
+        if (info < 0) {
+            error_msg << "Parameter number " << -info
+                      << " had an illegal value.";
+        }
+        else if (info == scratchpad_size && e.detail() != 0) {
+            error_msg
+                << "Insufficient scratchpad size. Required size is at least "
+                << e.detail();
+        }
+        else {
+            error_msg << "Unexpected MKL exception caught during getri_batch() "
+                         "call:\nreason: "
+                      << e.what() << "\ninfo: " << e.info();
+        }
+    } catch (sycl::exception const &e) {
+        is_exception_caught = true;
+        error_msg
+            << "Unexpected SYCL exception caught during getri_batch() call:\n"
+            << e.what();
+    }
+
+    if (is_exception_caught) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(getri_batch_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+    return getri_batch_event;
+}
+
+std::pair<sycl::event, sycl::event>
+    getri_batch(sycl::queue exec_q,
+                dpctl::tensor::usm_ndarray a_array,
+                dpctl::tensor::usm_ndarray ipiv_array,
+                py::list dev_info,
+                std::int64_t n,
+                std::int64_t stride_a,
+                std::int64_t stride_ipiv,
+                std::int64_t batch_size,
+                const std::vector<sycl::event> &depends)
+{
+    const int a_array_nd = a_array.get_ndim();
+    const int ipiv_array_nd = ipiv_array.get_ndim();
+
+    if (a_array_nd < 3) {
+        throw py::value_error(
+            "The input array has ndim=" + std::to_string(a_array_nd) +
+            ", but an array with ndim >= 3 is expected.");
+    }
+
+    if (ipiv_array_nd != 2) {
+        throw py::value_error("The array of pivot indices has ndim=" +
+                              std::to_string(ipiv_array_nd) +
+                              ", but a 2-dimensional array is expected.");
+    }
+
+    const int dev_info_size = py::len(dev_info);
+    if (dev_info_size != batch_size) {
+        throw py::value_error("The size of 'dev_info' (" +
+                              std::to_string(dev_info_size) +
+                              ") does not match the expected batch size (" +
+                              std::to_string(batch_size) + ").");
+    }
+
+    // check compatibility of execution queue and allocation queue
+    if (!dpctl::utils::queues_are_compatible(exec_q, {a_array, ipiv_array})) {
+        throw py::value_error(
+            "Execution queue is not compatible with allocation queues");
+    }
+
+    auto const &overlap = dpctl::tensor::overlap::MemoryOverlap();
+    if (overlap(a_array, ipiv_array)) {
+        throw py::value_error("The input array and the array of pivot indices "
+                              "are overlapping segments of memory");
+    }
+
+    bool is_a_array_c_contig = a_array.is_c_contiguous();
+    bool is_ipiv_array_c_contig = ipiv_array.is_c_contiguous();
+    if (!is_a_array_c_contig) {
+        throw py::value_error("The input array "
+                              "must be C-contiguous");
+    }
+    if (!is_ipiv_array_c_contig) {
+        throw py::value_error("The array of pivot indices "
+                              "must be C-contiguous");
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int a_array_type_id =
+        array_types.typenum_to_lookup_id(a_array.get_typenum());
+
+    getri_batch_impl_fn_ptr_t getri_batch_fn =
+        getri_batch_dispatch_vector[a_array_type_id];
+    if (getri_batch_fn == nullptr) {
+        throw py::value_error(
+            "No getri_batch implementation defined for the provided type "
+            "of the input matrix.");
+    }
+
+    auto ipiv_types = dpctl_td_ns::usm_ndarray_types();
+    int ipiv_array_type_id =
+        ipiv_types.typenum_to_lookup_id(ipiv_array.get_typenum());
+
+    if (ipiv_array_type_id != static_cast<int>(dpctl_td_ns::typenum_t::INT64)) {
+        throw py::value_error("The type of 'ipiv_array' must be int64.");
+    }
+
+    char *a_array_data = a_array.get_data();
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+
+    char *ipiv_array_data = ipiv_array.get_data();
+    std::int64_t *d_ipiv = reinterpret_cast<std::int64_t *>(ipiv_array_data);
+
+    std::vector<sycl::event> host_task_events;
+    sycl::event getri_batch_ev = getri_batch_fn(
+        exec_q, n, a_array_data, lda, stride_a, d_ipiv, stride_ipiv, batch_size,
+        dev_info, host_task_events, depends);
+
+    sycl::event args_ev = dpctl::utils::keep_args_alive(
+        exec_q, {a_array, ipiv_array}, host_task_events);
+
+    return std::make_pair(args_ev, getri_batch_ev);
+}
+
+template <typename fnT, typename T>
+struct GetriBatchContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::GetriBatchTypePairSupportFactory<T>::is_defined) {
+            return getri_batch_impl<T>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_getri_batch_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<getri_batch_impl_fn_ptr_t,
+                                       GetriBatchContigFactory,
+                                       dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_vector(getri_batch_dispatch_vector);
+}
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/heevd.cpp b/dpnp/backend/extensions/lapack/heevd.cpp
index e57d34f10c0..feffb2ec4a3 100644
--- a/dpnp/backend/extensions/lapack/heevd.cpp
+++ b/dpnp/backend/extensions/lapack/heevd.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/lapack/heevd.hpp b/dpnp/backend/extensions/lapack/heevd.hpp
index d11240cc452..89ecfe466fb 100644
--- a/dpnp/backend/extensions/lapack/heevd.hpp
+++ b/dpnp/backend/extensions/lapack/heevd.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/lapack/lapack_py.cpp b/dpnp/backend/extensions/lapack/lapack_py.cpp
index c0765be7509..71991be3652 100644
--- a/dpnp/backend/extensions/lapack/lapack_py.cpp
+++ b/dpnp/backend/extensions/lapack/lapack_py.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -31,8 +31,11 @@
 #include <pybind11/stl.h>
 
 #include "gesv.hpp"
+#include "getrf.hpp"
+#include "getri.hpp"
 #include "heevd.hpp"
 #include "linalg_exceptions.hpp"
+#include "potrf.hpp"
 #include "syevd.hpp"
 
 namespace lapack_ext = dpnp::backend::ext::lapack;
@@ -42,6 +45,11 @@ namespace py = pybind11;
 void init_dispatch_vectors(void)
 {
     lapack_ext::init_gesv_dispatch_vector();
+    lapack_ext::init_getrf_batch_dispatch_vector();
+    lapack_ext::init_getrf_dispatch_vector();
+    lapack_ext::init_getri_batch_dispatch_vector();
+    lapack_ext::init_potrf_batch_dispatch_vector();
+    lapack_ext::init_potrf_dispatch_vector();
     lapack_ext::init_syevd_dispatch_vector();
 }
 
@@ -68,6 +76,28 @@ PYBIND11_MODULE(_lapack_impl, m)
           py::arg("sycl_queue"), py::arg("coeff_matrix"),
           py::arg("dependent_vals"), py::arg("depends") = py::list());
 
+    m.def("_getrf", &lapack_ext::getrf,
+          "Call `getrf` from OneMKL LAPACK library to return "
+          "the LU factorization of a general n x n matrix",
+          py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
+          py::arg("dev_info"), py::arg("depends") = py::list());
+
+    m.def("_getrf_batch", &lapack_ext::getrf_batch,
+          "Call `getrf_batch` from OneMKL LAPACK library to return "
+          "the LU factorization of a batch of general n x n matrices",
+          py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
+          py::arg("dev_info_array"), py::arg("n"), py::arg("stride_a"),
+          py::arg("stride_ipiv"), py::arg("batch_size"),
+          py::arg("depends") = py::list());
+
+    m.def("_getri_batch", &lapack_ext::getri_batch,
+          "Call `getri_batch` from OneMKL LAPACK library to return "
+          "the inverses of a batch of LU-factored matrices",
+          py::arg("sycl_queue"), py::arg("a_array"), py::arg("ipiv_array"),
+          py::arg("dev_info"), py::arg("n"), py::arg("stride_a"),
+          py::arg("stride_ipiv"), py::arg("batch_size"),
+          py::arg("depends") = py::list());
+
     m.def("_heevd", &lapack_ext::heevd,
           "Call `heevd` from OneMKL LAPACK library to return "
           "the eigenvalues and eigenvectors of a complex Hermitian matrix",
@@ -75,6 +105,20 @@ PYBIND11_MODULE(_lapack_impl, m)
           py::arg("eig_vecs"), py::arg("eig_vals"),
           py::arg("depends") = py::list());
 
+    m.def("_potrf", &lapack_ext::potrf,
+          "Call `potrf` from OneMKL LAPACK library to return "
+          "the Cholesky factorization of a symmetric positive-definite matrix",
+          py::arg("sycl_queue"), py::arg("a_array"), py::arg("upper_lower"),
+          py::arg("depends") = py::list());
+
+    m.def("_potrf_batch", &lapack_ext::potrf_batch,
+          "Call `potrf_batch` from OneMKL LAPACK library to return "
+          "the Cholesky factorization of a batch of symmetric "
+          "positive-definite matrix",
+          py::arg("sycl_queue"), py::arg("a_array"), py::arg("upper_lower"),
+          py::arg("n"), py::arg("stride_a"), py::arg("batch_size"),
+          py::arg("depends") = py::list());
+
     m.def("_syevd", &lapack_ext::syevd,
           "Call `syevd` from OneMKL LAPACK library to return "
           "the eigenvalues and eigenvectors of a real symmetric matrix",
diff --git a/dpnp/backend/extensions/lapack/linalg_exceptions.hpp b/dpnp/backend/extensions/lapack/linalg_exceptions.hpp
index 083be22429c..48f69d70116 100644
--- a/dpnp/backend/extensions/lapack/linalg_exceptions.hpp
+++ b/dpnp/backend/extensions/lapack/linalg_exceptions.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/lapack/potrf.cpp b/dpnp/backend/extensions/lapack/potrf.cpp
new file mode 100644
index 00000000000..610a629a9eb
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/potrf.cpp
@@ -0,0 +1,221 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "linalg_exceptions.hpp"
+#include "potrf.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*potrf_impl_fn_ptr_t)(sycl::queue,
+                                           const oneapi::mkl::uplo,
+                                           const std::int64_t,
+                                           char *,
+                                           std::int64_t,
+                                           std::vector<sycl::event> &,
+                                           const std::vector<sycl::event> &);
+
+static potrf_impl_fn_ptr_t potrf_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event potrf_impl(sycl::queue exec_q,
+                              const oneapi::mkl::uplo upper_lower,
+                              const std::int64_t n,
+                              char *in_a,
+                              std::int64_t lda,
+                              std::vector<sycl::event> &host_task_events,
+                              const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+
+    const std::int64_t scratchpad_size =
+        mkl_lapack::potrf_scratchpad_size<T>(exec_q, upper_lower, n, lda);
+    T *scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+    bool is_exception_caught = false;
+
+    sycl::event potrf_event;
+    try {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        potrf_event = mkl_lapack::potrf(
+            exec_q,
+            upper_lower, // An enumeration value of type oneapi::mkl::uplo:
+                         // oneapi::mkl::uplo::upper for the upper triangular
+                         // part; oneapi::mkl::uplo::lower for the lower
+                         // triangular part.
+            n,           // Order of the square matrix; (0 ≤ n).
+            a,           // Pointer to the n-by-n matrix.
+            lda,         // The leading dimension of `a`.
+            scratchpad,  // Pointer to scratchpad memory to be used by MKL
+                         // routine for storing intermediate results.
+            scratchpad_size, depends);
+    } catch (mkl_lapack::exception const &e) {
+        is_exception_caught = true;
+        info = e.info();
+        if (info < 0) {
+            error_msg << "Parameter number " << -info
+                      << " had an illegal value.";
+        }
+        else if (info == scratchpad_size && e.detail() != 0) {
+            error_msg
+                << "Insufficient scratchpad size. Required size is at least "
+                << e.detail();
+        }
+        else if (info > 0 && e.detail() == 0) {
+            sycl::free(scratchpad, exec_q);
+            throw LinAlgError("Matrix is not positive definite.");
+        }
+        else {
+            error_msg << "Unexpected MKL exception caught during getrf() "
+                         "call:\nreason: "
+                      << e.what() << "\ninfo: " << e.info();
+        }
+    } catch (sycl::exception const &e) {
+        is_exception_caught = true;
+        error_msg << "Unexpected SYCL exception caught during potrf() call:\n"
+                  << e.what();
+    }
+
+    if (is_exception_caught) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(potrf_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+    return potrf_event;
+}
+
+std::pair<sycl::event, sycl::event>
+    potrf(sycl::queue q,
+          dpctl::tensor::usm_ndarray a_array,
+          const std::int8_t upper_lower,
+          const std::vector<sycl::event> &depends)
+{
+    const int a_array_nd = a_array.get_ndim();
+
+    if (a_array_nd != 2) {
+        throw py::value_error(
+            "The input array has ndim=" + std::to_string(a_array_nd) +
+            ", but a 2-dimensional array is expected.");
+    }
+
+    const py::ssize_t *a_array_shape = a_array.get_shape_raw();
+
+    if (a_array_shape[0] != a_array_shape[1]) {
+        throw py::value_error("The input array must be square,"
+                              " but got a shape of (" +
+                              std::to_string(a_array_shape[0]) + ", " +
+                              std::to_string(a_array_shape[1]) + ").");
+    }
+
+    bool is_a_array_c_contig = a_array.is_c_contiguous();
+    if (!is_a_array_c_contig) {
+        throw py::value_error("The input array "
+                              "must be C-contiguous");
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int a_array_type_id =
+        array_types.typenum_to_lookup_id(a_array.get_typenum());
+
+    potrf_impl_fn_ptr_t potrf_fn = potrf_dispatch_vector[a_array_type_id];
+    if (potrf_fn == nullptr) {
+        throw py::value_error(
+            "No potrf implementation defined for the provided type "
+            "of the input matrix.");
+    }
+
+    char *a_array_data = a_array.get_data();
+    const std::int64_t n = a_array_shape[0];
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+    const oneapi::mkl::uplo uplo_val =
+        static_cast<oneapi::mkl::uplo>(upper_lower);
+
+    std::vector<sycl::event> host_task_events;
+    sycl::event potrf_ev =
+        potrf_fn(q, uplo_val, n, a_array_data, lda, host_task_events, depends);
+
+    sycl::event args_ev =
+        dpctl::utils::keep_args_alive(q, {a_array}, host_task_events);
+
+    return std::make_pair(args_ev, potrf_ev);
+}
+
+template <typename fnT, typename T>
+struct PotrfContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::PotrfTypePairSupportFactory<T>::is_defined) {
+            return potrf_impl<T>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_potrf_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<potrf_impl_fn_ptr_t, PotrfContigFactory,
+                                       dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_vector(potrf_dispatch_vector);
+}
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/potrf.hpp b/dpnp/backend/extensions/lapack/potrf.hpp
new file mode 100644
index 00000000000..f0850b3fd98
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/potrf.hpp
@@ -0,0 +1,61 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#pragma once
+
+#include <CL/sycl.hpp>
+#include <oneapi/mkl.hpp>
+
+#include <dpctl4pybind11.hpp>
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+extern std::pair<sycl::event, sycl::event>
+    potrf(sycl::queue exec_q,
+          dpctl::tensor::usm_ndarray a_array,
+          const std::int8_t upper_lower,
+          const std::vector<sycl::event> &depends = {});
+
+extern std::pair<sycl::event, sycl::event>
+    potrf_batch(sycl::queue exec_q,
+                dpctl::tensor::usm_ndarray a_array,
+                const std::int8_t upper_lower,
+                const std::int64_t n,
+                const std::int64_t stride_a,
+                const std::int64_t batch_size,
+                const std::vector<sycl::event> &depends = {});
+
+extern void init_potrf_dispatch_vector(void);
+extern void init_potrf_batch_dispatch_vector(void);
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/potrf_batch.cpp b/dpnp/backend/extensions/lapack/potrf_batch.cpp
new file mode 100644
index 00000000000..1a36bae4efd
--- /dev/null
+++ b/dpnp/backend/extensions/lapack/potrf_batch.cpp
@@ -0,0 +1,257 @@
+//*****************************************************************************
+// Copyright (c) 2024, Intel Corporation
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+// - Redistributions of source code must retain the above copyright notice,
+//   this list of conditions and the following disclaimer.
+// - Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+// THE POSSIBILITY OF SUCH DAMAGE.
+//*****************************************************************************
+
+#include <pybind11/pybind11.h>
+
+// dpctl tensor headers
+#include "utils/memory_overlap.hpp"
+#include "utils/type_utils.hpp"
+
+#include "linalg_exceptions.hpp"
+#include "potrf.hpp"
+#include "types_matrix.hpp"
+
+#include "dpnp_utils.hpp"
+
+namespace dpnp
+{
+namespace backend
+{
+namespace ext
+{
+namespace lapack
+{
+namespace mkl_lapack = oneapi::mkl::lapack;
+namespace py = pybind11;
+namespace type_utils = dpctl::tensor::type_utils;
+
+typedef sycl::event (*potrf_batch_impl_fn_ptr_t)(
+    sycl::queue,
+    const oneapi::mkl::uplo,
+    const std::int64_t,
+    char *,
+    const std::int64_t,
+    const std::int64_t,
+    const std::int64_t,
+    std::vector<sycl::event> &,
+    const std::vector<sycl::event> &);
+
+static potrf_batch_impl_fn_ptr_t
+    potrf_batch_dispatch_vector[dpctl_td_ns::num_types];
+
+template <typename T>
+static sycl::event potrf_batch_impl(sycl::queue exec_q,
+                                    const oneapi::mkl::uplo upper_lower,
+                                    const std::int64_t n,
+                                    char *in_a,
+                                    const std::int64_t lda,
+                                    const std::int64_t stride_a,
+                                    const std::int64_t batch_size,
+                                    std::vector<sycl::event> &host_task_events,
+                                    const std::vector<sycl::event> &depends)
+{
+    type_utils::validate_type_for_device<T>(exec_q);
+
+    T *a = reinterpret_cast<T *>(in_a);
+
+    const std::int64_t scratchpad_size =
+        mkl_lapack::potrf_batch_scratchpad_size<T>(exec_q, upper_lower, n, lda,
+                                                   stride_a, batch_size);
+    T *scratchpad = nullptr;
+
+    std::stringstream error_msg;
+    std::int64_t info = 0;
+    bool is_exception_caught = false;
+
+    sycl::event potrf_batch_event;
+    try {
+        scratchpad = sycl::malloc_device<T>(scratchpad_size, exec_q);
+
+        potrf_batch_event = mkl_lapack::potrf_batch(
+            exec_q,
+            upper_lower, // An enumeration value of type oneapi::mkl::uplo:
+                         // oneapi::mkl::uplo::upper for the upper triangular
+                         // part; oneapi::mkl::uplo::lower for the lower
+                         // triangular part.
+            n,           // Order of each square matrix in the batch; (0 ≤ n).
+            a,           // Pointer to the batch of matrices.
+            lda,         // The leading dimension of `a`.
+            stride_a,    // Stride between matrices: Element spacing between
+                         // matrices in `a`.
+            batch_size,  // Total number of matrices in the batch.
+            scratchpad,  // Pointer to scratchpad memory to be used by MKL
+                         // routine for storing intermediate results.
+            scratchpad_size, depends);
+    } catch (mkl_lapack::batch_error const &be) {
+        // Get the indices of matrices within the batch that encountered an
+        // error
+        auto error_matrices_ids = be.ids();
+
+        error_msg
+            << "Matrix is not positive definite. Errors in matrices with IDs: ";
+        for (size_t i = 0; i < error_matrices_ids.size(); ++i) {
+            error_msg << error_matrices_ids[i];
+            if (i < error_matrices_ids.size() - 1) {
+                error_msg << ", ";
+            }
+        }
+        error_msg << ".";
+
+        sycl::free(scratchpad, exec_q);
+        throw LinAlgError(error_msg.str().c_str());
+    } catch (mkl_lapack::exception const &e) {
+        is_exception_caught = true;
+        info = e.info();
+
+        if (info < 0) {
+            error_msg << "Parameter number " << -info
+                      << " had an illegal value.";
+        }
+        else if (info == scratchpad_size && e.detail() != 0) {
+            error_msg
+                << "Insufficient scratchpad size. Required size is at least "
+                << e.detail();
+        }
+        else if (info != 0 && e.detail() == 0) {
+            error_msg << "Error in batch processing. "
+                         "Number of failed calculations: "
+                      << info;
+        }
+        else {
+            error_msg << "Unexpected MKL exception caught during potrf_batch() "
+                         "call:\nreason: "
+                      << e.what() << "\ninfo: " << e.info();
+        }
+    } catch (sycl::exception const &e) {
+        is_exception_caught = true;
+        error_msg
+            << "Unexpected SYCL exception caught during potrf_batch() call:\n"
+            << e.what();
+    }
+
+    if (is_exception_caught) // an unexpected error occurs
+    {
+        if (scratchpad != nullptr) {
+            sycl::free(scratchpad, exec_q);
+        }
+        throw std::runtime_error(error_msg.str());
+    }
+
+    sycl::event clean_up_event = exec_q.submit([&](sycl::handler &cgh) {
+        cgh.depends_on(potrf_batch_event);
+        auto ctx = exec_q.get_context();
+        cgh.host_task([ctx, scratchpad]() { sycl::free(scratchpad, ctx); });
+    });
+    host_task_events.push_back(clean_up_event);
+    return potrf_batch_event;
+}
+
+std::pair<sycl::event, sycl::event>
+    potrf_batch(sycl::queue q,
+                dpctl::tensor::usm_ndarray a_array,
+                const std::int8_t upper_lower,
+                const std::int64_t n,
+                const std::int64_t stride_a,
+                const std::int64_t batch_size,
+                const std::vector<sycl::event> &depends)
+{
+    const int a_array_nd = a_array.get_ndim();
+
+    if (a_array_nd < 3) {
+        throw py::value_error(
+            "The input array has ndim=" + std::to_string(a_array_nd) +
+            ", but a 3-dimensional or higher array is expected.");
+    }
+
+    const py::ssize_t *a_array_shape = a_array.get_shape_raw();
+
+    if (a_array_shape[a_array_nd - 1] != a_array_shape[a_array_nd - 2]) {
+        throw py::value_error(
+            "The last two dimensions of the input array must be square,"
+            " but got a shape of (" +
+            std::to_string(a_array_shape[a_array_nd - 1]) + ", " +
+            std::to_string(a_array_shape[a_array_nd - 2]) + ").");
+    }
+
+    bool is_a_array_c_contig = a_array.is_c_contiguous();
+    if (!is_a_array_c_contig) {
+        throw py::value_error("The input array "
+                              "must be C-contiguous");
+    }
+
+    auto array_types = dpctl_td_ns::usm_ndarray_types();
+    int a_array_type_id =
+        array_types.typenum_to_lookup_id(a_array.get_typenum());
+
+    potrf_batch_impl_fn_ptr_t potrf_batch_fn =
+        potrf_batch_dispatch_vector[a_array_type_id];
+    if (potrf_batch_fn == nullptr) {
+        throw py::value_error(
+            "No potrf_batch implementation defined for the provided type "
+            "of the input matrix.");
+    }
+
+    char *a_array_data = a_array.get_data();
+    const std::int64_t lda = std::max<size_t>(1UL, n);
+    const oneapi::mkl::uplo uplo_val =
+        static_cast<oneapi::mkl::uplo>(upper_lower);
+
+    std::vector<sycl::event> host_task_events;
+    sycl::event potrf_batch_ev =
+        potrf_batch_fn(q, uplo_val, n, a_array_data, lda, stride_a, batch_size,
+                       host_task_events, depends);
+
+    sycl::event args_ev =
+        dpctl::utils::keep_args_alive(q, {a_array}, host_task_events);
+
+    return std::make_pair(args_ev, potrf_batch_ev);
+}
+
+template <typename fnT, typename T>
+struct PotrfBatchContigFactory
+{
+    fnT get()
+    {
+        if constexpr (types::PotrfBatchTypePairSupportFactory<T>::is_defined) {
+            return potrf_batch_impl<T>;
+        }
+        else {
+            return nullptr;
+        }
+    }
+};
+
+void init_potrf_batch_dispatch_vector(void)
+{
+    dpctl_td_ns::DispatchVectorBuilder<potrf_batch_impl_fn_ptr_t,
+                                       PotrfBatchContigFactory,
+                                       dpctl_td_ns::num_types>
+        contig;
+    contig.populate_dispatch_vector(potrf_batch_dispatch_vector);
+}
+} // namespace lapack
+} // namespace ext
+} // namespace backend
+} // namespace dpnp
diff --git a/dpnp/backend/extensions/lapack/syevd.cpp b/dpnp/backend/extensions/lapack/syevd.cpp
index 11c13569bf6..0374e96b8bb 100644
--- a/dpnp/backend/extensions/lapack/syevd.cpp
+++ b/dpnp/backend/extensions/lapack/syevd.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/lapack/syevd.hpp b/dpnp/backend/extensions/lapack/syevd.hpp
index 56fe49232b6..9dfaba08ae1 100644
--- a/dpnp/backend/extensions/lapack/syevd.hpp
+++ b/dpnp/backend/extensions/lapack/syevd.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/lapack/types_matrix.hpp b/dpnp/backend/extensions/lapack/types_matrix.hpp
index 60521cb75a3..7e5413b84c8 100644
--- a/dpnp/backend/extensions/lapack/types_matrix.hpp
+++ b/dpnp/backend/extensions/lapack/types_matrix.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -69,6 +69,85 @@ struct GesvTypePairSupportFactory
         // fall-through
         dpctl_td_ns::NotDefinedEntry>::is_defined;
 };
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::getrf<T>
+ * function.
+ *
+ * @tparam T Type of array containing input matrix,
+ * as well as the output array for storing the LU factorization.
+ */
+template <typename T>
+struct GetrfTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<float>,
+                                          T,
+                                          std::complex<float>>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<double>,
+                                          T,
+                                          std::complex<double>>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::getrf_batch<T>
+ * function.
+ *
+ * @tparam T Type of array containing input matrix,
+ * as well as the output array for storing the LU factorization.
+ */
+template <typename T>
+struct GetrfBatchTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<float>,
+                                          T,
+                                          std::complex<float>>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<double>,
+                                          T,
+                                          std::complex<double>>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::getri_batch<T>
+ * function.
+ *
+ * @tparam T Type of array containing input matrix (LU-factored form),
+ * as well as the output array for storing the inverse of the matrix.
+ */
+template <typename T>
+struct GetriBatchTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<float>,
+                                          T,
+                                          std::complex<float>>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<double>,
+                                          T,
+                                          std::complex<double>>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
+
 /**
  * @brief A factory to define pairs of supported types for which
  * MKL LAPACK library provides support in oneapi::mkl::lapack::heevd<T, RealT>
@@ -89,6 +168,58 @@ struct HeevdTypePairSupportFactory
         dpctl_td_ns::NotDefinedEntry>::is_defined;
 };
 
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::potrf<T>
+ * function.
+ *
+ * @tparam T Type of array containing input matrix,
+ * as well as the output array for storing the Cholesky factor L.
+ */
+template <typename T>
+struct PotrfTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<float>,
+                                          T,
+                                          std::complex<float>>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<double>,
+                                          T,
+                                          std::complex<double>>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
+
+/**
+ * @brief A factory to define pairs of supported types for which
+ * MKL LAPACK library provides support in oneapi::mkl::lapack::potrf<T>
+ * function.
+ *
+ * @tparam T Type of array containing input matrices,
+ * as well as the output arrays for storing the Cholesky factor L.
+ */
+template <typename T>
+struct PotrfBatchTypePairSupportFactory
+{
+    static constexpr bool is_defined = std::disjunction<
+        dpctl_td_ns::TypePairDefinedEntry<T, double, T, double>,
+        dpctl_td_ns::TypePairDefinedEntry<T, float, T, float>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<float>,
+                                          T,
+                                          std::complex<float>>,
+        dpctl_td_ns::TypePairDefinedEntry<T,
+                                          std::complex<double>,
+                                          T,
+                                          std::complex<double>>,
+        // fall-through
+        dpctl_td_ns::NotDefinedEntry>::is_defined;
+};
+
 /**
  * @brief A factory to define pairs of supported types for which
  * MKL LAPACK library provides support in oneapi::mkl::lapack::syevd<T>
diff --git a/dpnp/backend/extensions/sycl_ext/CMakeLists.txt b/dpnp/backend/extensions/sycl_ext/CMakeLists.txt
index 4aff4d2e766..6b1c76076d5 100644
--- a/dpnp/backend/extensions/sycl_ext/CMakeLists.txt
+++ b/dpnp/backend/extensions/sycl_ext/CMakeLists.txt
@@ -1,5 +1,5 @@
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -45,7 +45,7 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
 
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
 
 if (WIN32)
diff --git a/dpnp/backend/extensions/sycl_ext/dispatcher_utils.hpp b/dpnp/backend/extensions/sycl_ext/dispatcher_utils.hpp
index f04472e9c18..49364ad1e90 100644
--- a/dpnp/backend/extensions/sycl_ext/dispatcher_utils.hpp
+++ b/dpnp/backend/extensions/sycl_ext/dispatcher_utils.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/sycl_ext/sum_mean.cpp b/dpnp/backend/extensions/sycl_ext/sum_mean.cpp
index e5097cd060a..620fb3a67a5 100644
--- a/dpnp/backend/extensions/sycl_ext/sum_mean.cpp
+++ b/dpnp/backend/extensions/sycl_ext/sum_mean.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/sycl_ext/sum_mean.hpp b/dpnp/backend/extensions/sycl_ext/sum_mean.hpp
index 69e85eed884..5333456b0c7 100644
--- a/dpnp/backend/extensions/sycl_ext/sum_mean.hpp
+++ b/dpnp/backend/extensions/sycl_ext/sum_mean.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/CMakeLists.txt b/dpnp/backend/extensions/vm/CMakeLists.txt
index 3f13fb571a6..03a5038ebb8 100644
--- a/dpnp/backend/extensions/vm/CMakeLists.txt
+++ b/dpnp/backend/extensions/vm/CMakeLists.txt
@@ -1,5 +1,5 @@
 # *****************************************************************************
-# Copyright (c) 2023, Intel Corporation
+# Copyright (c) 2023-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -45,7 +45,7 @@ set_target_properties(${python_module_name} PROPERTIES CMAKE_POSITION_INDEPENDEN
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../include)
 target_include_directories(${python_module_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/../../src)
 
-target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIRS})
+target_include_directories(${python_module_name} PUBLIC ${Dpctl_INCLUDE_DIR})
 target_include_directories(${python_module_name} PUBLIC ${Dpctl_TENSOR_INCLUDE_DIR})
 
 if (WIN32)
diff --git a/dpnp/backend/extensions/vm/abs.hpp b/dpnp/backend/extensions/vm/abs.hpp
index 4a2894e7c46..bb5e55010b4 100644
--- a/dpnp/backend/extensions/vm/abs.hpp
+++ b/dpnp/backend/extensions/vm/abs.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/acos.hpp b/dpnp/backend/extensions/vm/acos.hpp
index 392ee56e172..029a9d9c886 100644
--- a/dpnp/backend/extensions/vm/acos.hpp
+++ b/dpnp/backend/extensions/vm/acos.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/acosh.hpp b/dpnp/backend/extensions/vm/acosh.hpp
index 9e3d2491382..9f86ae589cf 100644
--- a/dpnp/backend/extensions/vm/acosh.hpp
+++ b/dpnp/backend/extensions/vm/acosh.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/add.hpp b/dpnp/backend/extensions/vm/add.hpp
index 85cbda9e668..47ff60ed96a 100644
--- a/dpnp/backend/extensions/vm/add.hpp
+++ b/dpnp/backend/extensions/vm/add.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/asin.hpp b/dpnp/backend/extensions/vm/asin.hpp
index 4ff131c352f..5e44aa5bde6 100644
--- a/dpnp/backend/extensions/vm/asin.hpp
+++ b/dpnp/backend/extensions/vm/asin.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/asinh.hpp b/dpnp/backend/extensions/vm/asinh.hpp
index 3bc61dc813d..58e2815e3f7 100644
--- a/dpnp/backend/extensions/vm/asinh.hpp
+++ b/dpnp/backend/extensions/vm/asinh.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/atan.hpp b/dpnp/backend/extensions/vm/atan.hpp
index ed38bdc2e71..b36abc16138 100644
--- a/dpnp/backend/extensions/vm/atan.hpp
+++ b/dpnp/backend/extensions/vm/atan.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/atan2.hpp b/dpnp/backend/extensions/vm/atan2.hpp
index 09788cd8a41..19a66e877ac 100644
--- a/dpnp/backend/extensions/vm/atan2.hpp
+++ b/dpnp/backend/extensions/vm/atan2.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/atanh.hpp b/dpnp/backend/extensions/vm/atanh.hpp
index 3f22f7c4283..9764df84ce3 100644
--- a/dpnp/backend/extensions/vm/atanh.hpp
+++ b/dpnp/backend/extensions/vm/atanh.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/cbrt.hpp b/dpnp/backend/extensions/vm/cbrt.hpp
index 83a44335bcc..5c0a0adc53e 100644
--- a/dpnp/backend/extensions/vm/cbrt.hpp
+++ b/dpnp/backend/extensions/vm/cbrt.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/ceil.hpp b/dpnp/backend/extensions/vm/ceil.hpp
index 04265342168..fd4f3a8680c 100644
--- a/dpnp/backend/extensions/vm/ceil.hpp
+++ b/dpnp/backend/extensions/vm/ceil.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/common.hpp b/dpnp/backend/extensions/vm/common.hpp
index 60bfb4e0881..b53b9b0881c 100644
--- a/dpnp/backend/extensions/vm/common.hpp
+++ b/dpnp/backend/extensions/vm/common.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/conj.hpp b/dpnp/backend/extensions/vm/conj.hpp
index 106c20eb95f..af3acb3466e 100644
--- a/dpnp/backend/extensions/vm/conj.hpp
+++ b/dpnp/backend/extensions/vm/conj.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/cos.hpp b/dpnp/backend/extensions/vm/cos.hpp
index 58cea706a0b..a085123ca14 100644
--- a/dpnp/backend/extensions/vm/cos.hpp
+++ b/dpnp/backend/extensions/vm/cos.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/cosh.hpp b/dpnp/backend/extensions/vm/cosh.hpp
index ec1a160c03e..301a2fbeb22 100644
--- a/dpnp/backend/extensions/vm/cosh.hpp
+++ b/dpnp/backend/extensions/vm/cosh.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/div.hpp b/dpnp/backend/extensions/vm/div.hpp
index 48cbe7f000c..c1306660484 100644
--- a/dpnp/backend/extensions/vm/div.hpp
+++ b/dpnp/backend/extensions/vm/div.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/exp.hpp b/dpnp/backend/extensions/vm/exp.hpp
index e5de0e1f371..936b6a5a0ce 100644
--- a/dpnp/backend/extensions/vm/exp.hpp
+++ b/dpnp/backend/extensions/vm/exp.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/exp2.hpp b/dpnp/backend/extensions/vm/exp2.hpp
index 8f80d0c1d50..362897fdbe6 100644
--- a/dpnp/backend/extensions/vm/exp2.hpp
+++ b/dpnp/backend/extensions/vm/exp2.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/expm1.hpp b/dpnp/backend/extensions/vm/expm1.hpp
index 6f443a99a6b..d0a94bca8e9 100644
--- a/dpnp/backend/extensions/vm/expm1.hpp
+++ b/dpnp/backend/extensions/vm/expm1.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/floor.hpp b/dpnp/backend/extensions/vm/floor.hpp
index ec4413909ac..c138b8b6678 100644
--- a/dpnp/backend/extensions/vm/floor.hpp
+++ b/dpnp/backend/extensions/vm/floor.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/hypot.hpp b/dpnp/backend/extensions/vm/hypot.hpp
index 5e729aa7477..19dd4345c36 100644
--- a/dpnp/backend/extensions/vm/hypot.hpp
+++ b/dpnp/backend/extensions/vm/hypot.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/ln.hpp b/dpnp/backend/extensions/vm/ln.hpp
index c08440c728e..574cc8fa33c 100644
--- a/dpnp/backend/extensions/vm/ln.hpp
+++ b/dpnp/backend/extensions/vm/ln.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/log10.hpp b/dpnp/backend/extensions/vm/log10.hpp
index 1d2556a1ffc..dc030817cda 100644
--- a/dpnp/backend/extensions/vm/log10.hpp
+++ b/dpnp/backend/extensions/vm/log10.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/log1p.hpp b/dpnp/backend/extensions/vm/log1p.hpp
index 7b5e46752f1..39ab1b3a21c 100644
--- a/dpnp/backend/extensions/vm/log1p.hpp
+++ b/dpnp/backend/extensions/vm/log1p.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/log2.hpp b/dpnp/backend/extensions/vm/log2.hpp
index 2d7d359e234..2c419ac8ab2 100644
--- a/dpnp/backend/extensions/vm/log2.hpp
+++ b/dpnp/backend/extensions/vm/log2.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/mul.hpp b/dpnp/backend/extensions/vm/mul.hpp
index 52ddaae377a..39ea8eec20a 100644
--- a/dpnp/backend/extensions/vm/mul.hpp
+++ b/dpnp/backend/extensions/vm/mul.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/pow.hpp b/dpnp/backend/extensions/vm/pow.hpp
index 744cbb073a6..f5e946914bf 100644
--- a/dpnp/backend/extensions/vm/pow.hpp
+++ b/dpnp/backend/extensions/vm/pow.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/round.hpp b/dpnp/backend/extensions/vm/round.hpp
index 8c7ae033f49..a2ae3b3bc52 100644
--- a/dpnp/backend/extensions/vm/round.hpp
+++ b/dpnp/backend/extensions/vm/round.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/sin.hpp b/dpnp/backend/extensions/vm/sin.hpp
index adfec626885..0af14c68c87 100644
--- a/dpnp/backend/extensions/vm/sin.hpp
+++ b/dpnp/backend/extensions/vm/sin.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/sinh.hpp b/dpnp/backend/extensions/vm/sinh.hpp
index b48ca06c79f..6fe53423c53 100644
--- a/dpnp/backend/extensions/vm/sinh.hpp
+++ b/dpnp/backend/extensions/vm/sinh.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/sqr.hpp b/dpnp/backend/extensions/vm/sqr.hpp
index 7dbd96eea42..8f1d4ac44fd 100644
--- a/dpnp/backend/extensions/vm/sqr.hpp
+++ b/dpnp/backend/extensions/vm/sqr.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/sqrt.hpp b/dpnp/backend/extensions/vm/sqrt.hpp
index f819c49723e..e3984133628 100644
--- a/dpnp/backend/extensions/vm/sqrt.hpp
+++ b/dpnp/backend/extensions/vm/sqrt.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/sub.hpp b/dpnp/backend/extensions/vm/sub.hpp
index e2b25b4b798..e1a2464b867 100644
--- a/dpnp/backend/extensions/vm/sub.hpp
+++ b/dpnp/backend/extensions/vm/sub.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/tan.hpp b/dpnp/backend/extensions/vm/tan.hpp
index d9d7a025fd6..d759ea46fe1 100644
--- a/dpnp/backend/extensions/vm/tan.hpp
+++ b/dpnp/backend/extensions/vm/tan.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/tanh.hpp b/dpnp/backend/extensions/vm/tanh.hpp
index 068f93185d4..98909685ff2 100644
--- a/dpnp/backend/extensions/vm/tanh.hpp
+++ b/dpnp/backend/extensions/vm/tanh.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/trunc.hpp b/dpnp/backend/extensions/vm/trunc.hpp
index 1950bbdcc57..c06c7cf566f 100644
--- a/dpnp/backend/extensions/vm/trunc.hpp
+++ b/dpnp/backend/extensions/vm/trunc.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/types_matrix.hpp b/dpnp/backend/extensions/vm/types_matrix.hpp
index 0ddc61884e1..5b4ccb8fdf6 100644
--- a/dpnp/backend/extensions/vm/types_matrix.hpp
+++ b/dpnp/backend/extensions/vm/types_matrix.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/extensions/vm/vm_py.cpp b/dpnp/backend/extensions/vm/vm_py.cpp
index 09416b00918..74d2ae67794 100644
--- a/dpnp/backend/extensions/vm/vm_py.cpp
+++ b/dpnp/backend/extensions/vm/vm_py.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
index 795d3e9c534..ea1c477173f 100644
--- a/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_1arg_1type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp
index 019267829f0..3abc54c7212 100644
--- a/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_1arg_2type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
index e4ca7bc24fd..130283e5834 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_1type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
index bb54c4b152e..d84accb0757 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_2type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2023, Intel Corporation
+// Copyright (c) 2023-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
index 353d1400320..7423085d659 100644
--- a/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
+++ b/dpnp/backend/include/dpnp_gen_2arg_3type_tbl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/include/dpnp_iface.hpp b/dpnp/backend/include/dpnp_iface.hpp
index 0cbcfab9f96..c3dfceef21a 100644
--- a/dpnp/backend/include/dpnp_iface.hpp
+++ b/dpnp/backend/include/dpnp_iface.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/include/dpnp_iface_fft.hpp b/dpnp/backend/include/dpnp_iface_fft.hpp
index 993e205095f..308ec7897f2 100644
--- a/dpnp/backend/include/dpnp_iface_fft.hpp
+++ b/dpnp/backend/include/dpnp_iface_fft.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/include/dpnp_iface_fptr.hpp b/dpnp/backend/include/dpnp_iface_fptr.hpp
index 8b1f4c48a11..2e2ce5ab144 100644
--- a/dpnp/backend/include/dpnp_iface_fptr.hpp
+++ b/dpnp/backend/include/dpnp_iface_fptr.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -78,8 +78,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_ARGMAX,       /**< Used in numpy.argmax() impl  */
     DPNP_FN_ARGMIN,       /**< Used in numpy.argmin() impl  */
     DPNP_FN_ARGSORT,      /**< Used in numpy.argsort() impl  */
-    DPNP_FN_ARGSORT_EXT,  /**< Used in numpy.argsort() impl, requires extra
-                             parameters */
     DPNP_FN_AROUND,       /**< Used in numpy.around() impl  */
     DPNP_FN_ASTYPE,       /**< Used in numpy.astype() impl  */
     DPNP_FN_BITWISE_AND,  /**< Used in numpy.bitwise_and() impl  */
@@ -88,8 +86,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_CBRT,         /**< Used in numpy.cbrt() impl  */
     DPNP_FN_CEIL,         /**< Used in numpy.ceil() impl  */
     DPNP_FN_CHOLESKY,     /**< Used in numpy.linalg.cholesky() impl  */
-    DPNP_FN_CHOLESKY_EXT, /**< Used in numpy.linalg.cholesky() impl, requires
-                             extra parameters */
     DPNP_FN_CONJUGATE,    /**< Used in numpy.conjugate() impl  */
     DPNP_FN_CHOOSE,       /**< Used in numpy.choose() impl  */
     DPNP_FN_CHOOSE_EXT,   /**< Used in numpy.choose() impl, requires extra
@@ -121,8 +117,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_DEGREES_EXT,   /**< Used in numpy.degrees() impl, requires extra
                               parameters */
     DPNP_FN_DET,           /**< Used in numpy.linalg.det() impl  */
-    DPNP_FN_DET_EXT,       /**< Used in numpy.linalg.det() impl, requires extra
-                              parameters */
     DPNP_FN_DIAG,          /**< Used in numpy.diag() impl  */
     DPNP_FN_DIAG_INDICES,  /**< Used in numpy.diag_indices() impl  */
     DPNP_FN_DIAG_INDICES_EXT, /**< Used in numpy.diag_indices() impl, requires
@@ -180,8 +174,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_INITVAL_EXT, /**< Used in numpy ones, ones_like, zeros, zeros_like
                             impls  */
     DPNP_FN_INV,         /**< Used in numpy.linalg.inv() impl  */
-    DPNP_FN_INV_EXT,     /**< Used in numpy.linalg.inv() impl, requires extra
-                            parameters  */
     DPNP_FN_INVERT,      /**< Used in numpy.invert() impl  */
     DPNP_FN_KRON,        /**< Used in numpy.kron() impl  */
     DPNP_FN_KRON_EXT, /**< Used in numpy.kron() impl, requires extra parameters
@@ -192,8 +184,6 @@ enum class DPNPFuncName : size_t
     DPNP_FN_LOG2,            /**< Used in numpy.log2() impl  */
     DPNP_FN_LOG1P,           /**< Used in numpy.log1p() impl  */
     DPNP_FN_MATMUL,          /**< Used in numpy.matmul() impl  */
-    DPNP_FN_MATMUL_EXT,      /**< Used in numpy.matmul() impl, requires extra
-                                parameters */
     DPNP_FN_MATRIX_RANK,     /**< Used in numpy.linalg.matrix_rank() impl  */
     DPNP_FN_MATRIX_RANK_EXT, /**< Used in numpy.linalg.matrix_rank() impl,
                                 requires extra parameters */
@@ -237,8 +227,6 @@ enum class DPNPFuncName : size_t
                                parameters */
     DPNP_FN_REMAINDER,      /**< Used in numpy.remainder() impl  */
     DPNP_FN_RECIP,          /**< Used in numpy.recip() impl  */
-    DPNP_FN_RECIP_EXT,      /**< Used in numpy.recip() impl, requires extra
-                               parameters */
     DPNP_FN_REPEAT,         /**< Used in numpy.repeat() impl  */
     DPNP_FN_RIGHT_SHIFT,    /**< Used in numpy.right_shift() impl  */
     DPNP_FN_RNG_BETA,       /**< Used in numpy.random.beta() impl  */
@@ -365,9 +353,7 @@ enum class DPNPFuncName : size_t
     DPNP_FN_SIN,              /**< Used in numpy.sin() impl  */
     DPNP_FN_SINH,             /**< Used in numpy.sinh() impl  */
     DPNP_FN_SORT,             /**< Used in numpy.sort() impl  */
-    DPNP_FN_SORT_EXT, /**< Used in numpy.sort() impl, requires extra parameters
-                       */
-    DPNP_FN_SQRT,     /**< Used in numpy.sqrt() impl  */
+    DPNP_FN_SQRT,             /**< Used in numpy.sqrt() impl  */
     DPNP_FN_SQRT_EXT, /**< Used in numpy.sqrt() impl, requires extra parameters
                        */
     DPNP_FN_SQUARE,   /**< Used in numpy.square() impl  */
@@ -376,29 +362,28 @@ enum class DPNPFuncName : size_t
     DPNP_FN_SUBTRACT_EXT, /**< Used in numpy.subtract() impl, requires extra
                              parameters */
     DPNP_FN_SUM,          /**< Used in numpy.sum() impl  */
-    DPNP_FN_SUM_EXT, /**< Used in numpy.sum() impl, requires extra parameters */
-    DPNP_FN_SVD,     /**< Used in numpy.linalg.svd() impl  */
-    DPNP_FN_SVD_EXT, /**< Used in numpy.linalg.svd() impl, requires extra
-                        parameters */
-    DPNP_FN_TAKE,    /**< Used in numpy.take() impl  */
-    DPNP_FN_TAN,     /**< Used in numpy.tan() impl  */
-    DPNP_FN_TANH,    /**< Used in numpy.tanh() impl  */
-    DPNP_FN_TRANSPOSE,  /**< Used in numpy.transpose() impl  */
-    DPNP_FN_TRACE,      /**< Used in numpy.trace() impl  */
-    DPNP_FN_TRACE_EXT,  /**< Used in numpy.trace() impl, requires extra
-                           parameters */
-    DPNP_FN_TRAPZ,      /**< Used in numpy.trapz() impl  */
-    DPNP_FN_TRAPZ_EXT,  /**< Used in numpy.trapz() impl, requires extra
-                           parameters */
-    DPNP_FN_TRI,        /**< Used in numpy.tri() impl  */
-    DPNP_FN_TRIL,       /**< Used in numpy.tril() impl  */
-    DPNP_FN_TRIU,       /**< Used in numpy.triu() impl  */
-    DPNP_FN_TRUNC,      /**< Used in numpy.trunc() impl  */
-    DPNP_FN_VANDER,     /**< Used in numpy.vander() impl  */
-    DPNP_FN_VAR,        /**< Used in numpy.var() impl  */
-    DPNP_FN_ZEROS,      /**< Used in numpy.zeros() impl */
-    DPNP_FN_ZEROS_LIKE, /**< Used in numpy.zeros_like() impl */
-    DPNP_FN_LAST,       /**< The latest element of the enumeration */
+    DPNP_FN_SVD,          /**< Used in numpy.linalg.svd() impl  */
+    DPNP_FN_SVD_EXT,      /**< Used in numpy.linalg.svd() impl, requires extra
+                             parameters */
+    DPNP_FN_TAKE,         /**< Used in numpy.take() impl  */
+    DPNP_FN_TAN,          /**< Used in numpy.tan() impl  */
+    DPNP_FN_TANH,         /**< Used in numpy.tanh() impl  */
+    DPNP_FN_TRANSPOSE,    /**< Used in numpy.transpose() impl  */
+    DPNP_FN_TRACE,        /**< Used in numpy.trace() impl  */
+    DPNP_FN_TRACE_EXT,    /**< Used in numpy.trace() impl, requires extra
+                             parameters */
+    DPNP_FN_TRAPZ,        /**< Used in numpy.trapz() impl  */
+    DPNP_FN_TRAPZ_EXT,    /**< Used in numpy.trapz() impl, requires extra
+                             parameters */
+    DPNP_FN_TRI,          /**< Used in numpy.tri() impl  */
+    DPNP_FN_TRIL,         /**< Used in numpy.tril() impl  */
+    DPNP_FN_TRIU,         /**< Used in numpy.triu() impl  */
+    DPNP_FN_TRUNC,        /**< Used in numpy.trunc() impl  */
+    DPNP_FN_VANDER,       /**< Used in numpy.vander() impl  */
+    DPNP_FN_VAR,          /**< Used in numpy.var() impl  */
+    DPNP_FN_ZEROS,        /**< Used in numpy.zeros() impl */
+    DPNP_FN_ZEROS_LIKE,   /**< Used in numpy.zeros_like() impl */
+    DPNP_FN_LAST,         /**< The latest element of the enumeration */
 };
 
 /**
diff --git a/dpnp/backend/include/dpnp_iface_random.hpp b/dpnp/backend/include/dpnp_iface_random.hpp
index 10ba3a2e282..33933046180 100644
--- a/dpnp/backend/include/dpnp_iface_random.hpp
+++ b/dpnp/backend/include/dpnp_iface_random.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
index 4fdd1a394ed..9db8425f6de 100644
--- a/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_bitwise.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -32,6 +32,12 @@
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
 
+// dpctl tensor headers
+#include "kernels/alignment.hpp"
+
+using dpctl::tensor::kernels::alignment_utils::is_aligned;
+using dpctl::tensor::kernels::alignment_utils::required_alignment;
+
 template <typename _KernelNameSpecialization>
 class dpnp_invert_c_kernel;
 
@@ -67,7 +73,10 @@ DPCTLSyclEventRef dpnp_invert_c(DPCTLSyclQueueRef q_ref,
             vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +
                       sg.get_group_id()[0] * max_sg_size);
 
-        if (start + static_cast<size_t>(vec_sz) * max_sg_size < size) {
+        if (is_aligned<required_alignment>(input_data) &&
+            is_aligned<required_alignment>(result) &&
+            (start + static_cast<size_t>(vec_sz) * max_sg_size < size))
+        {
             auto input_multi_ptr = sycl::address_space_cast<
                 sycl::access::address_space::global_space,
                 sycl::access::decorated::yes>(&input_data[start]);
diff --git a/dpnp/backend/kernels/dpnp_krnl_common.cpp b/dpnp/backend/kernels/dpnp_krnl_common.cpp
index d575f8bdb96..e664c30b848 100644
--- a/dpnp/backend/kernels/dpnp_krnl_common.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_common.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -946,26 +946,6 @@ void (*dpnp_matmul_default_c)(void *,
                               const shape_elem_type *) =
     dpnp_matmul_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_matmul_ext_c)(DPCTLSyclQueueRef,
-                                       void *,
-                                       const size_t,
-                                       const size_t,
-                                       const shape_elem_type *,
-                                       const shape_elem_type *,
-                                       const void *,
-                                       const size_t,
-                                       const size_t,
-                                       const shape_elem_type *,
-                                       const shape_elem_type *,
-                                       const void *,
-                                       const size_t,
-                                       const size_t,
-                                       const shape_elem_type *,
-                                       const shape_elem_type *,
-                                       const DPCTLEventVectorRef) =
-    dpnp_matmul_c<_DataType>;
-
 void func_map_init_linalg(func_map_t &fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ASTYPE][eft_BLN][eft_BLN] = {
@@ -1190,14 +1170,5 @@ void func_map_init_linalg(func_map_t &fmap)
     fmap[DPNPFuncName::DPNP_FN_MATMUL][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_matmul_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_MATMUL_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void *)dpnp_matmul_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_MATMUL_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_matmul_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_MATMUL_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_matmul_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_MATMUL_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_matmul_ext_c<double>};
-
     return;
 }
diff --git a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
index 9a85510cb01..6d17c7b985a 100644
--- a/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_elemwise.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -34,6 +34,12 @@
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
 
+// dpctl tensor headers
+#include "kernels/alignment.hpp"
+
+using dpctl::tensor::kernels::alignment_utils::is_aligned;
+using dpctl::tensor::kernels::alignment_utils::required_alignment;
+
 #define MACRO_1ARG_2TYPES_OP(__name__, __operation1__, __operation2__)         \
     template <typename _KernelNameSpecialization1,                             \
               typename _KernelNameSpecialization2>                             \
@@ -928,15 +934,6 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
     fmap[DPNPFuncName::DPNP_FN_RECIP][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_recip_c_default<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_RECIP_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void *)dpnp_recip_c_ext<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_RECIP_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_recip_c_ext<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_RECIP_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_recip_c_ext<float>};
-    fmap[DPNPFuncName::DPNP_FN_RECIP_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_recip_c_ext<double>};
-
     fmap[DPNPFuncName::DPNP_FN_SIGN][eft_INT][eft_INT] = {
         eft_INT, (void *)dpnp_sign_c_default<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_SIGN][eft_LNG][eft_LNG] = {
@@ -1198,8 +1195,12 @@ static void func_map_init_elemwise_1arg_1type(func_map_t &fmap)
                         (nd_it.get_group(0) * nd_it.get_local_range(0) +       \
                          sg.get_group_id()[0] * max_sg_size);                  \
                                                                                \
-                    if (start + static_cast<size_t>(vec_sz) * max_sg_size <    \
-                        result_size) {                                         \
+                    if (is_aligned<required_alignment>(input1_data) &&         \
+                        is_aligned<required_alignment>(input2_data) &&         \
+                        is_aligned<required_alignment>(result) &&              \
+                        (start + static_cast<size_t>(vec_sz) * max_sg_size <   \
+                         result_size))                                         \
+                    {                                                          \
                         auto input1_multi_ptr = sycl::address_space_cast<      \
                             sycl::access::address_space::global_space,         \
                             sycl::access::decorated::yes>(                     \
diff --git a/dpnp/backend/kernels/dpnp_krnl_fft.cpp b/dpnp/backend/kernels/dpnp_krnl_fft.cpp
index fa88ea92315..027f3343178 100644
--- a/dpnp/backend/kernels/dpnp_krnl_fft.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_fft.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
index 7dc35fb5a80..0889358d989 100644
--- a/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_indexing.cpp
@@ -935,6 +935,10 @@ void func_map_init_indexing_func(func_map_t &fmap)
         eft_FLT, (void *)dpnp_diagonal_ext_c<float>};
     fmap[DPNPFuncName::DPNP_FN_DIAGONAL_EXT][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_diagonal_ext_c<double>};
+    fmap[DPNPFuncName::DPNP_FN_DIAGONAL_EXT][eft_C64][eft_C64] = {
+        eft_C64, (void *)dpnp_diagonal_ext_c<std::complex<float>>};
+    fmap[DPNPFuncName::DPNP_FN_DIAGONAL_EXT][eft_C128][eft_C128] = {
+        eft_C128, (void *)dpnp_diagonal_ext_c<std::complex<double>>};
 
     fmap[DPNPFuncName::DPNP_FN_FILL_DIAGONAL][eft_INT][eft_INT] = {
         eft_INT, (void *)dpnp_fill_diagonal_default_c<int32_t>};
diff --git a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp
index 5e78a7cda17..e0b6de5b1b6 100644
--- a/dpnp/backend/kernels/dpnp_krnl_linalg.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_linalg.cpp
@@ -128,15 +128,6 @@ template <typename _DataType>
 void (*dpnp_cholesky_default_c)(void *, void *, const size_t, const size_t) =
     dpnp_cholesky_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_cholesky_ext_c)(DPCTLSyclQueueRef,
-                                         void *,
-                                         void *,
-                                         const size_t,
-                                         const size_t,
-                                         const DPCTLEventVectorRef) =
-    dpnp_cholesky_c<_DataType>;
-
 template <typename _DataType>
 DPCTLSyclEventRef dpnp_det_c(DPCTLSyclQueueRef q_ref,
                              void *array1_in,
@@ -387,15 +378,6 @@ template <typename _DataType, typename _ResultType>
 void (*dpnp_inv_default_c)(void *, void *, shape_elem_type *, size_t) =
     dpnp_inv_c<_DataType, _ResultType>;
 
-template <typename _DataType, typename _ResultType>
-DPCTLSyclEventRef (*dpnp_inv_ext_c)(DPCTLSyclQueueRef,
-                                    void *,
-                                    void *,
-                                    shape_elem_type *,
-                                    size_t,
-                                    const DPCTLEventVectorRef) =
-    dpnp_inv_c<_DataType, _ResultType>;
-
 template <typename _DataType1, typename _DataType2, typename _ResultType>
 class dpnp_kron_c_kernel;
 
@@ -860,11 +842,6 @@ void func_map_init_linalg_func(func_map_t &fmap)
     fmap[DPNPFuncName::DPNP_FN_CHOLESKY][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_cholesky_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_CHOLESKY_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_cholesky_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_CHOLESKY_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_cholesky_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_DET][eft_INT][eft_INT] = {
         eft_INT, (void *)dpnp_det_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_DET][eft_LNG][eft_LNG] = {
@@ -874,15 +851,6 @@ void func_map_init_linalg_func(func_map_t &fmap)
     fmap[DPNPFuncName::DPNP_FN_DET][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_det_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void *)dpnp_det_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_det_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_det_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_DET_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_det_ext_c<double>};
-
     fmap[DPNPFuncName::DPNP_FN_INV][eft_INT][eft_INT] = {
         eft_DBL, (void *)dpnp_inv_default_c<int32_t, double>};
     fmap[DPNPFuncName::DPNP_FN_INV][eft_LNG][eft_LNG] = {
@@ -892,27 +860,6 @@ void func_map_init_linalg_func(func_map_t &fmap)
     fmap[DPNPFuncName::DPNP_FN_INV][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_inv_default_c<double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_INT][eft_INT] = {
-        get_default_floating_type(),
-        (void *)dpnp_inv_ext_c<
-            int32_t, func_type_map_t::find_type<get_default_floating_type()>>,
-        get_default_floating_type<std::false_type>(),
-        (void *)dpnp_inv_ext_c<
-            int32_t, func_type_map_t::find_type<
-                         get_default_floating_type<std::false_type>()>>};
-    fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_LNG][eft_LNG] = {
-        get_default_floating_type(),
-        (void *)dpnp_inv_ext_c<
-            int64_t, func_type_map_t::find_type<get_default_floating_type()>>,
-        get_default_floating_type<std::false_type>(),
-        (void *)dpnp_inv_ext_c<
-            int64_t, func_type_map_t::find_type<
-                         get_default_floating_type<std::false_type>()>>};
-    fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_inv_ext_c<float, float>};
-    fmap[DPNPFuncName::DPNP_FN_INV_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_inv_ext_c<double, double>};
-
     fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_INT] = {
         eft_INT, (void *)dpnp_kron_default_c<int32_t, int32_t, int32_t>};
     fmap[DPNPFuncName::DPNP_FN_KRON][eft_INT][eft_LNG] = {
diff --git a/dpnp/backend/kernels/dpnp_krnl_logic.cpp b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
index 0674f1387b7..0174b47339a 100644
--- a/dpnp/backend/kernels/dpnp_krnl_logic.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_logic.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -31,6 +31,12 @@
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
 
+// dpctl tensor headers
+#include "kernels/alignment.hpp"
+
+using dpctl::tensor::kernels::alignment_utils::is_aligned;
+using dpctl::tensor::kernels::alignment_utils::required_alignment;
+
 template <typename _DataType, typename _ResultType>
 class dpnp_all_c_kernel;
 
@@ -610,8 +616,12 @@ DPCTLSyclEventRef (*dpnp_any_ext_c)(DPCTLSyclQueueRef,
                     vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +  \
                               sg.get_group_id()[0] * max_sg_size);             \
                                                                                \
-                if (start + static_cast<size_t>(vec_sz) * max_sg_size <        \
-                    result_size) {                                             \
+                if (is_aligned<required_alignment>(input1_data) &&             \
+                    is_aligned<required_alignment>(input2_data) &&             \
+                    is_aligned<required_alignment>(result) &&                  \
+                    (start + static_cast<size_t>(vec_sz) * max_sg_size <       \
+                     result_size))                                             \
+                {                                                              \
                     auto input1_multi_ptr = sycl::address_space_cast<          \
                         sycl::access::address_space::global_space,             \
                         sycl::access::decorated::yes>(&input1_data[start]);    \
diff --git a/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp b/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp
index 315e1c211f9..aaaa5a179dd 100644
--- a/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_manipulation.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
index 975f4b67ca8..d80ccfa186e 100644
--- a/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_mathematical.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -35,6 +35,12 @@
 #include "dpnpc_memory_adapter.hpp"
 #include "queue_sycl.hpp"
 
+// dpctl tensor headers
+#include "kernels/alignment.hpp"
+
+using dpctl::tensor::kernels::alignment_utils::is_aligned;
+using dpctl::tensor::kernels::alignment_utils::required_alignment;
+
 static_assert(__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_VECTOR_ABS_CHANGED,
               "SYCL DPC++ compiler does not meet minimum version requirement");
 
@@ -163,7 +169,10 @@ DPCTLSyclEventRef
                 vec_sz * (nd_it.get_group(0) * nd_it.get_local_range(0) +
                           sg.get_group_id()[0] * max_sg_size);
 
-            if (start + static_cast<size_t>(vec_sz) * max_sg_size < size) {
+            if (is_aligned<required_alignment>(array1) &&
+                is_aligned<required_alignment>(result) &&
+                (start + static_cast<size_t>(vec_sz) * max_sg_size < size))
+            {
                 auto array_multi_ptr = sycl::address_space_cast<
                     sycl::access::address_space::global_space,
                     sycl::access::decorated::yes>(&array1[start]);
diff --git a/dpnp/backend/kernels/dpnp_krnl_random.cpp b/dpnp/backend/kernels/dpnp_krnl_random.cpp
index 97594f1e7a6..53f37ee5c3c 100644
--- a/dpnp/backend/kernels/dpnp_krnl_random.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_random.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/kernels/dpnp_krnl_reduction.cpp b/dpnp/backend/kernels/dpnp_krnl_reduction.cpp
index d9534379102..a9383615e06 100644
--- a/dpnp/backend/kernels/dpnp_krnl_reduction.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_reduction.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
@@ -177,19 +177,6 @@ void (*dpnp_sum_default_c)(void *,
                            const long *) =
     dpnp_sum_c<_DataType_output, _DataType_input>;
 
-template <typename _DataType_output, typename _DataType_input>
-DPCTLSyclEventRef (*dpnp_sum_ext_c)(DPCTLSyclQueueRef,
-                                    void *,
-                                    const void *,
-                                    const shape_elem_type *,
-                                    const size_t,
-                                    const shape_elem_type *,
-                                    const size_t,
-                                    const void *,
-                                    const long *,
-                                    const DPCTLEventVectorRef) =
-    dpnp_sum_c<_DataType_output, _DataType_input>;
-
 template <typename _KernelNameSpecialization1,
           typename _KernelNameSpecialization2>
 class dpnp_prod_c_kernel;
@@ -372,41 +359,5 @@ void func_map_init_reduction(func_map_t &fmap)
     fmap[DPNPFuncName::DPNP_FN_SUM][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_sum_default_c<double, double>};
 
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_INT][eft_INT] = {
-        eft_LNG, (void *)dpnp_sum_ext_c<int32_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_INT][eft_LNG] = {
-        eft_LNG, (void *)dpnp_sum_ext_c<int64_t, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_INT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_sum_ext_c<float, int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_INT][eft_DBL] = {
-        eft_DBL, (void *)dpnp_sum_ext_c<double, int32_t>};
-
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_LNG][eft_INT] = {
-        eft_INT, (void *)dpnp_sum_ext_c<int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_sum_ext_c<int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_LNG][eft_FLT] = {
-        eft_FLT, (void *)dpnp_sum_ext_c<float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_LNG][eft_DBL] = {
-        eft_DBL, (void *)dpnp_sum_ext_c<double, int64_t>};
-
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_FLT][eft_INT] = {
-        eft_INT, (void *)dpnp_sum_ext_c<int32_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_FLT][eft_LNG] = {
-        eft_LNG, (void *)dpnp_sum_ext_c<int64_t, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_sum_ext_c<float, float>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_FLT][eft_DBL] = {
-        eft_DBL, (void *)dpnp_sum_ext_c<double, float>};
-
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_DBL][eft_INT] = {
-        eft_INT, (void *)dpnp_sum_ext_c<int32_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_DBL][eft_LNG] = {
-        eft_LNG, (void *)dpnp_sum_ext_c<int64_t, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_DBL][eft_FLT] = {
-        eft_FLT, (void *)dpnp_sum_ext_c<float, double>};
-    fmap[DPNPFuncName::DPNP_FN_SUM_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_sum_ext_c<double, double>};
-
     return;
 }
diff --git a/dpnp/backend/kernels/dpnp_krnl_searching.cpp b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
index 045d405056c..96ff470d7b0 100644
--- a/dpnp/backend/kernels/dpnp_krnl_searching.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_searching.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp
index ac4992466e2..6f33c1af723 100644
--- a/dpnp/backend/kernels/dpnp_krnl_sorting.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_sorting.cpp
@@ -97,14 +97,6 @@ template <typename _DataType, typename _idx_DataType>
 void (*dpnp_argsort_default_c)(void *, void *, size_t) =
     dpnp_argsort_c<_DataType, _idx_DataType>;
 
-template <typename _DataType, typename _idx_DataType>
-DPCTLSyclEventRef (*dpnp_argsort_ext_c)(DPCTLSyclQueueRef,
-                                        void *,
-                                        void *,
-                                        size_t,
-                                        const DPCTLEventVectorRef) =
-    dpnp_argsort_c<_DataType, _idx_DataType>;
-
 // template void dpnp_argsort_c<double, long>(void* array1_in, void* result1,
 // size_t size); template void dpnp_argsort_c<float, long>(void* array1_in,
 // void* result1, size_t size); template void dpnp_argsort_c<long, long>(void*
@@ -471,14 +463,6 @@ void dpnp_sort_c(void *array1_in, void *result1, size_t size)
 template <typename _DataType>
 void (*dpnp_sort_default_c)(void *, void *, size_t) = dpnp_sort_c<_DataType>;
 
-template <typename _DataType>
-DPCTLSyclEventRef (*dpnp_sort_ext_c)(DPCTLSyclQueueRef,
-                                     void *,
-                                     void *,
-                                     size_t,
-                                     const DPCTLEventVectorRef) =
-    dpnp_sort_c<_DataType>;
-
 void func_map_init_sorting(func_map_t &fmap)
 {
     fmap[DPNPFuncName::DPNP_FN_ARGSORT][eft_INT][eft_INT] = {
@@ -490,15 +474,6 @@ void func_map_init_sorting(func_map_t &fmap)
     fmap[DPNPFuncName::DPNP_FN_ARGSORT][eft_DBL][eft_DBL] = {
         eft_LNG, (void *)dpnp_argsort_default_c<double, int64_t>};
 
-    fmap[DPNPFuncName::DPNP_FN_ARGSORT_EXT][eft_INT][eft_INT] = {
-        eft_LNG, (void *)dpnp_argsort_ext_c<int32_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ARGSORT_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_argsort_ext_c<int64_t, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ARGSORT_EXT][eft_FLT][eft_FLT] = {
-        eft_LNG, (void *)dpnp_argsort_ext_c<float, int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_ARGSORT_EXT][eft_DBL][eft_DBL] = {
-        eft_LNG, (void *)dpnp_argsort_ext_c<double, int64_t>};
-
     fmap[DPNPFuncName::DPNP_FN_PARTITION][eft_INT][eft_INT] = {
         eft_INT, (void *)dpnp_partition_default_c<int32_t>};
     fmap[DPNPFuncName::DPNP_FN_PARTITION][eft_LNG][eft_LNG] = {
@@ -550,14 +525,5 @@ void func_map_init_sorting(func_map_t &fmap)
     fmap[DPNPFuncName::DPNP_FN_SORT][eft_DBL][eft_DBL] = {
         eft_DBL, (void *)dpnp_sort_default_c<double>};
 
-    fmap[DPNPFuncName::DPNP_FN_SORT_EXT][eft_INT][eft_INT] = {
-        eft_INT, (void *)dpnp_sort_ext_c<int32_t>};
-    fmap[DPNPFuncName::DPNP_FN_SORT_EXT][eft_LNG][eft_LNG] = {
-        eft_LNG, (void *)dpnp_sort_ext_c<int64_t>};
-    fmap[DPNPFuncName::DPNP_FN_SORT_EXT][eft_FLT][eft_FLT] = {
-        eft_FLT, (void *)dpnp_sort_ext_c<float>};
-    fmap[DPNPFuncName::DPNP_FN_SORT_EXT][eft_DBL][eft_DBL] = {
-        eft_DBL, (void *)dpnp_sort_ext_c<double>};
-
     return;
 }
diff --git a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
index 97df3c2d7f1..a108b520091 100644
--- a/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
+++ b/dpnp/backend/kernels/dpnp_krnl_statistics.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/constants.cpp b/dpnp/backend/src/constants.cpp
index 602636c7f30..01f671882f5 100644
--- a/dpnp/backend/src/constants.cpp
+++ b/dpnp/backend/src/constants.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/constants.hpp b/dpnp/backend/src/constants.hpp
index e64051d4bc6..68069e18a9b 100644
--- a/dpnp/backend/src/constants.hpp
+++ b/dpnp/backend/src/constants.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/dpnp_fptr.hpp b/dpnp/backend/src/dpnp_fptr.hpp
index 459398f7611..a46f3a7d35d 100644
--- a/dpnp/backend/src/dpnp_fptr.hpp
+++ b/dpnp/backend/src/dpnp_fptr.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/dpnp_iface_fptr.cpp b/dpnp/backend/src/dpnp_iface_fptr.cpp
index c19211e929b..a0683d44a96 100644
--- a/dpnp/backend/src/dpnp_iface_fptr.cpp
+++ b/dpnp/backend/src/dpnp_iface_fptr.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/dpnp_iterator.hpp b/dpnp/backend/src/dpnp_iterator.hpp
index 399f2706383..498cb58db03 100644
--- a/dpnp/backend/src/dpnp_iterator.hpp
+++ b/dpnp/backend/src/dpnp_iterator.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/dpnp_pstl.hpp b/dpnp/backend/src/dpnp_pstl.hpp
index 445b4374eb9..6ed9035054b 100644
--- a/dpnp/backend/src/dpnp_pstl.hpp
+++ b/dpnp/backend/src/dpnp_pstl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/dpnp_random_state.cpp b/dpnp/backend/src/dpnp_random_state.cpp
index d8d3b6bacaa..106ebef8f18 100644
--- a/dpnp/backend/src/dpnp_random_state.cpp
+++ b/dpnp/backend/src/dpnp_random_state.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2022-2023, Intel Corporation
+// Copyright (c) 2022-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/dpnp_random_state.hpp b/dpnp/backend/src/dpnp_random_state.hpp
index 862a2cb765c..9f6070be7d1 100644
--- a/dpnp/backend/src/dpnp_random_state.hpp
+++ b/dpnp/backend/src/dpnp_random_state.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2022-2023, Intel Corporation
+// Copyright (c) 2022-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/dpnp_utils.hpp b/dpnp/backend/src/dpnp_utils.hpp
index bb939004ee0..88e993a0a20 100644
--- a/dpnp/backend/src/dpnp_utils.hpp
+++ b/dpnp/backend/src/dpnp_utils.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/dpnpc_memory_adapter.hpp b/dpnp/backend/src/dpnpc_memory_adapter.hpp
index d6948dcccc4..f5998b8e734 100644
--- a/dpnp/backend/src/dpnpc_memory_adapter.hpp
+++ b/dpnp/backend/src/dpnpc_memory_adapter.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/memory_sycl.cpp b/dpnp/backend/src/memory_sycl.cpp
index e7922bf798f..2694ed25b4c 100644
--- a/dpnp/backend/src/memory_sycl.cpp
+++ b/dpnp/backend/src/memory_sycl.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/queue_sycl.cpp b/dpnp/backend/src/queue_sycl.cpp
index 067de6f84f3..b24cab1da72 100644
--- a/dpnp/backend/src/queue_sycl.cpp
+++ b/dpnp/backend/src/queue_sycl.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/queue_sycl.hpp b/dpnp/backend/src/queue_sycl.hpp
index 845617be089..b59c028e104 100644
--- a/dpnp/backend/src/queue_sycl.hpp
+++ b/dpnp/backend/src/queue_sycl.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/verbose.cpp b/dpnp/backend/src/verbose.cpp
index c3f7aa70fa7..6a449697c4e 100644
--- a/dpnp/backend/src/verbose.cpp
+++ b/dpnp/backend/src/verbose.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/src/verbose.hpp b/dpnp/backend/src/verbose.hpp
index f63c6a57ea4..ae67dbe56fa 100644
--- a/dpnp/backend/src/verbose.hpp
+++ b/dpnp/backend/src/verbose.hpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/tests/CMakeLists.txt b/dpnp/backend/tests/CMakeLists.txt
index e8bd37d2532..00bd726b094 100644
--- a/dpnp/backend/tests/CMakeLists.txt
+++ b/dpnp/backend/tests/CMakeLists.txt
@@ -1,5 +1,5 @@
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/tests/test_broadcast_iterator.cpp b/dpnp/backend/tests/test_broadcast_iterator.cpp
index 7b4e88ce835..58587b1774f 100644
--- a/dpnp/backend/tests/test_broadcast_iterator.cpp
+++ b/dpnp/backend/tests/test_broadcast_iterator.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/tests/test_main.cpp b/dpnp/backend/tests/test_main.cpp
index 965caa1ce09..667d2771847 100644
--- a/dpnp/backend/tests/test_main.cpp
+++ b/dpnp/backend/tests/test_main.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/tests/test_random.cpp b/dpnp/backend/tests/test_random.cpp
index 170014b052e..f5e4c6400c5 100644
--- a/dpnp/backend/tests/test_random.cpp
+++ b/dpnp/backend/tests/test_random.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/tests/test_utils.cpp b/dpnp/backend/tests/test_utils.cpp
index b68a74122ea..b29bc82af12 100644
--- a/dpnp/backend/tests/test_utils.cpp
+++ b/dpnp/backend/tests/test_utils.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/backend/tests/test_utils_iterator.cpp b/dpnp/backend/tests/test_utils_iterator.cpp
index 45af2270a87..f8f6748f8ee 100644
--- a/dpnp/backend/tests/test_utils_iterator.cpp
+++ b/dpnp/backend/tests/test_utils_iterator.cpp
@@ -1,5 +1,5 @@
 //*****************************************************************************
-// Copyright (c) 2016-2023, Intel Corporation
+// Copyright (c) 2016-2024, Intel Corporation
 // All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/config.py b/dpnp/config.py
index c0463ffbb96..e0b3e7d5954 100644
--- a/dpnp/config.py
+++ b/dpnp/config.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dparray.pxd b/dpnp/dparray.pxd
index 95a2963c7d0..62196d86156 100644
--- a/dpnp/dparray.pxd
+++ b/dpnp/dparray.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dparray.pyx b/dpnp/dparray.pyx
index 947065b23ef..768b36432e6 100644
--- a/dpnp/dparray.pyx
+++ b/dpnp/dparray.pyx
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/__init__.pxd b/dpnp/dpnp_algo/__init__.pxd
index a366ade85c4..03a85d03fe6 100644
--- a/dpnp/dpnp_algo/__init__.pxd
+++ b/dpnp/dpnp_algo/__init__.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/__init__.py b/dpnp/dpnp_algo/__init__.py
index ae617d8ae81..ea10c91a0f4 100644
--- a/dpnp/dpnp_algo/__init__.py
+++ b/dpnp/dpnp_algo/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/dpnp_algo.pxd b/dpnp/dpnp_algo/dpnp_algo.pxd
index 18813e3e04c..895b393aeff 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pxd
+++ b/dpnp/dpnp_algo/dpnp_algo.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,10 +36,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_ALLCLOSE
         DPNP_FN_ALLCLOSE_EXT
         DPNP_FN_ARANGE
-        DPNP_FN_ARGSORT
-        DPNP_FN_ARGSORT_EXT
-        DPNP_FN_CHOLESKY
-        DPNP_FN_CHOLESKY_EXT
         DPNP_FN_CHOOSE
         DPNP_FN_CHOOSE_EXT
         DPNP_FN_COPY
@@ -54,8 +50,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_CUMSUM_EXT
         DPNP_FN_DEGREES
         DPNP_FN_DEGREES_EXT
-        DPNP_FN_DET
-        DPNP_FN_DET_EXT
         DPNP_FN_DIAG_INDICES
         DPNP_FN_DIAG_INDICES_EXT
         DPNP_FN_DIAGONAL
@@ -84,12 +78,8 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_FMOD_EXT
         DPNP_FN_FULL
         DPNP_FN_FULL_LIKE
-        DPNP_FN_INV
-        DPNP_FN_INV_EXT
         DPNP_FN_KRON
         DPNP_FN_KRON_EXT
-        DPNP_FN_MATMUL
-        DPNP_FN_MATMUL_EXT
         DPNP_FN_MATRIX_RANK
         DPNP_FN_MATRIX_RANK_EXT
         DPNP_FN_MAXIMUM
@@ -110,8 +100,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_QR_EXT
         DPNP_FN_RADIANS
         DPNP_FN_RADIANS_EXT
-        DPNP_FN_RECIP
-        DPNP_FN_RECIP_EXT
         DPNP_FN_RNG_BETA
         DPNP_FN_RNG_BETA_EXT
         DPNP_FN_RNG_BINOMIAL
@@ -183,10 +171,6 @@ cdef extern from "dpnp_iface_fptr.hpp" namespace "DPNPFuncName":  # need this na
         DPNP_FN_RNG_ZIPF_EXT
         DPNP_FN_SEARCHSORTED
         DPNP_FN_SEARCHSORTED_EXT
-        DPNP_FN_SORT
-        DPNP_FN_SORT_EXT
-        DPNP_FN_SUM
-        DPNP_FN_SUM_EXT
         DPNP_FN_SVD
         DPNP_FN_SVD_EXT
         DPNP_FN_TRACE
@@ -284,16 +268,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_strides_t)(c_dpctl.DPCTLSyclQu
                                                              const long * ,
                                                              const c_dpctl.DPCTLEventVectorRef) except +
 ctypedef void(*fptr_blas_gemm_2in_1out_t)(void *, void * , void * , size_t, size_t, size_t)
-ctypedef c_dpctl.DPCTLSyclEventRef(*dpnp_reduction_c_t)(c_dpctl.DPCTLSyclQueueRef,
-                                                        void *,
-                                                        const void * ,
-                                                        const shape_elem_type*,
-                                                        const size_t,
-                                                        const shape_elem_type*,
-                                                        const size_t,
-                                                        const void * ,
-                                                        const long*,
-                                                        const c_dpctl.DPCTLEventVectorRef)
 
 
 """
@@ -314,8 +288,6 @@ cpdef dpnp_descriptor dpnp_isclose(dpnp_descriptor input1, dpnp_descriptor input
 Linear algebra
 """
 cpdef dpnp_descriptor dpnp_dot(dpnp_descriptor in_array1, dpnp_descriptor in_array2)
-cpdef dpnp_descriptor dpnp_matmul(dpnp_descriptor in_array1, dpnp_descriptor in_array2, dpnp_descriptor out=*)
-
 
 """
 Array creation routines
@@ -331,15 +303,8 @@ cpdef dpnp_descriptor dpnp_fmin(dpnp_descriptor x1_obj, dpnp_descriptor x2_obj,
                                    dpnp_descriptor out=*, object where=*)
 
 
-"""
-Sorting functions
-"""
-cpdef dpnp_descriptor dpnp_argsort(dpnp_descriptor array1)
-cpdef dpnp_descriptor dpnp_sort(dpnp_descriptor array1)
-
 """
 Trigonometric functions
 """
 cpdef dpnp_descriptor dpnp_degrees(dpnp_descriptor array1)
 cpdef dpnp_descriptor dpnp_radians(dpnp_descriptor array1)
-cpdef dpnp_descriptor dpnp_recip(dpnp_descriptor array1)
diff --git a/dpnp/dpnp_algo/dpnp_algo.pyx b/dpnp/dpnp_algo/dpnp_algo.pyx
index 257c502bfa0..fadba02a032 100644
--- a/dpnp/dpnp_algo/dpnp_algo.pyx
+++ b/dpnp/dpnp_algo/dpnp_algo.pyx
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi
index 1322b1ccea5..4e260f724cd 100644
--- a/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_arraycreation.pxi
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi
index 25cebe84d18..ab8eceeb415 100644
--- a/dpnp/dpnp_algo/dpnp_algo_indexing.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_indexing.pxi
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -40,7 +40,6 @@ __all__ += [
     "dpnp_diag_indices",
     "dpnp_diagonal",
     "dpnp_fill_diagonal",
-    "dpnp_indices",
     "dpnp_putmask",
     "dpnp_select",
     "dpnp_tril_indices",
@@ -237,40 +236,6 @@ cpdef dpnp_fill_diagonal(dpnp_descriptor x1, val):
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
 
-cpdef object dpnp_indices(dimensions):
-    len_dimensions = len(dimensions)
-    res_shape = []
-    res_shape.append(len_dimensions)
-    for i in range(len_dimensions):
-        res_shape.append(dimensions[i])
-
-    result = []
-    if len_dimensions == 1:
-        res = []
-        for i in range(dimensions[0]):
-            res.append(i)
-        result.append(res)
-    else:
-        res1 = []
-        for i in range(dimensions[0]):
-            res = []
-            for j in range(dimensions[1]):
-                res.append(i)
-            res1.append(res)
-        result.append(res1)
-
-        res2 = []
-        for i in range(dimensions[0]):
-            res = []
-            for j in range(dimensions[1]):
-                res.append(j)
-            res2.append(res)
-        result.append(res2)
-
-    dpnp_result = dpnp.array(result)
-    return dpnp_result
-
-
 cpdef dpnp_putmask(utils.dpnp_descriptor arr, utils.dpnp_descriptor mask, utils.dpnp_descriptor values):
     cdef int values_size = values.size
 
diff --git a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
index 6614399f182..9b4faf2a1b5 100644
--- a/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_linearalgebra.pxi
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -39,7 +39,6 @@ __all__ += [
     "dpnp_dot",
     "dpnp_inner",
     "dpnp_kron",
-    "dpnp_matmul",
 ]
 
 
@@ -56,14 +55,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_dot_t)(c_dpctl.DPCTLSyclQueueR
                                                          void * , const size_t, const size_t,
                                                          const shape_elem_type *, const shape_elem_type * ,
                                                          const c_dpctl.DPCTLEventVectorRef) except +
-ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_2in_1out_matmul_t)(c_dpctl.DPCTLSyclQueueRef,
-                                                            void * , const size_t, const size_t,
-                                                            const shape_elem_type *, const shape_elem_type * ,
-                                                            void * , const size_t, const size_t,
-                                                            const shape_elem_type *, const shape_elem_type * ,
-                                                            void * , const size_t, const size_t,
-                                                            const shape_elem_type *, const shape_elem_type * ,
-                                                            const c_dpctl.DPCTLEventVectorRef)
 
 cpdef utils.dpnp_descriptor dpnp_dot(utils.dpnp_descriptor in_array1,
                                      utils.dpnp_descriptor in_array2,
@@ -288,92 +279,3 @@ cpdef utils.dpnp_descriptor dpnp_kron(dpnp_descriptor in_array1, dpnp_descriptor
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
-
-
-cpdef utils.dpnp_descriptor dpnp_matmul(utils.dpnp_descriptor in_array1, utils.dpnp_descriptor in_array2, utils.dpnp_descriptor out=None):
-
-    cdef shape_type_c shape_result
-
-    cdef shape_type_c shape1 = in_array1.shape
-    cdef shape_type_c shape2 = in_array2.shape
-
-    cdef size_t size_m = 0
-    cdef size_t size_n = 0
-    cdef size_t size_k = 0
-
-    # Calling this function on an empty container causes undefined behavior.
-    if not shape1.empty():
-        size_m = shape1.front()
-    if not shape2.empty():
-        size_n = shape2.back()
-    if not shape1.empty():
-        size_k = shape1.back()
-
-    cdef size_t ndim_max = max(in_array1.ndim, in_array2.ndim)
-
-    if in_array1.ndim < ndim_max or ndim_max == 1:
-        """
-        shape1(2,), shape2(2,4)
-        test: pytest tests/test_matmul.py::test_matmul[shape_pair4-types0] -v -s
-        or
-        shape1(2,), shape2(2,)
-        test: pytest tests/test_matmul.py::test_matmul[shape_pair8-types0] -v -s
-        """
-        size_m = 1
-
-    if in_array2.ndim < ndim_max or ndim_max == 1:
-        """
-        shape1(5,2), shape2(2,)
-        test: pytest tests/test_matmul.py::test_matmul[shape_pair6-types0] -v -s
-        or
-        shape1(3,), shape2(3,)
-        test: pytest tests/test_matmul.py::test_matmul[shape_pair8-types0] -v -s
-        """
-        size_n = 1
-
-    shape_result = shape1[:-1] + shape2[-1:]
-
-    # convert string type names (array.dtype) to C enum DPNPFuncType
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(in_array1.dtype)
-    cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(in_array2.dtype)
-
-    # get the FPTR data structure
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_MATMUL_EXT, param1_type, param2_type)
-
-    # create result array with type given by FPTR data
-    result_sycl_device, result_usm_type, result_sycl_queue = utils.get_common_usm_allocation(in_array1, in_array2)
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(shape_result,
-                                                                       kernel_data.return_type,
-                                                                       out,
-                                                                       device=result_sycl_device,
-                                                                       usm_type=result_usm_type,
-                                                                       sycl_queue=result_sycl_queue)
-    if result.size == 0:
-        return result
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef fptr_2in_1out_matmul_t func = <fptr_2in_1out_matmul_t > kernel_data.ptr
-    # call FPTR function
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    result.get_data(),
-                                                    result.size,
-                                                    result.ndim,
-                                                    NULL,  # result_shape
-                                                    NULL,  # result_strides
-                                                    in_array1.get_data(),
-                                                    in_array1.size,
-                                                    in_array1.ndim,
-                                                    shape1.data(),
-                                                    NULL,  # in_array1_strides
-                                                    in_array2.get_data(),
-                                                    in_array2.size,
-                                                    in_array2.ndim,
-                                                    shape2.data(),
-                                                    NULL,  # in_array2_strides
-                                                    NULL)  # dep_event_vec_ref
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
diff --git a/dpnp/dpnp_algo/dpnp_algo_logic.pxi b/dpnp/dpnp_algo/dpnp_algo_logic.pxi
index cb97670f477..8810bfac6b1 100644
--- a/dpnp/dpnp_algo/dpnp_algo_logic.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_logic.pxi
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
index ce1b0c5f894..85f51e52eee 100644
--- a/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_mathematical.pxi
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -48,8 +48,6 @@ __all__ += [
     "dpnp_modf",
     "dpnp_nancumprod",
     "dpnp_nancumsum",
-    "dpnp_nansum",
-    "dpnp_sum",
     "dpnp_trapz",
 ]
 
@@ -278,82 +276,6 @@ cpdef utils.dpnp_descriptor dpnp_nancumsum(utils.dpnp_descriptor x1):
     return dpnp_cumsum(x1_desc)
 
 
-cpdef utils.dpnp_descriptor dpnp_nansum(utils.dpnp_descriptor x1):
-    x1_obj = x1.get_array()
-    cdef utils.dpnp_descriptor result = utils_py.create_output_descriptor_py(x1.shape,
-                                                                             x1.dtype,
-                                                                             None,
-                                                                             device=x1_obj.sycl_device,
-                                                                             usm_type=x1_obj.usm_type,
-                                                                             sycl_queue=x1_obj.sycl_queue)
-
-    for i in range(result.size):
-        input_elem = x1.get_pyobj().flat[i]
-
-        if dpnp.isnan(input_elem):
-            result.get_pyobj().flat[i] = 0
-        else:
-            result.get_pyobj().flat[i] = input_elem
-
-    return dpnp_sum(result)
-
-
-cpdef utils.dpnp_descriptor dpnp_sum(utils.dpnp_descriptor x1,
-                                     object axis=None,
-                                     object dtype=None,
-                                     utils.dpnp_descriptor out=None,
-                                     cpp_bool keepdims=False,
-                                     object initial=None,
-                                     object where=True):
-
-    cdef shape_type_c x1_shape = x1.shape
-    cdef DPNPFuncType x1_c_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
-
-    cdef shape_type_c axis_shape = utils._object_to_tuple(axis)
-
-    cdef shape_type_c result_shape = utils.get_reduction_output_shape(x1_shape, axis, keepdims)
-    cdef DPNPFuncType result_c_type = utils.get_output_c_type(DPNP_FN_SUM_EXT, x1_c_type, out, dtype)
-
-    """ select kernel """
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_SUM_EXT, x1_c_type, result_c_type)
-
-    x1_obj = x1.get_array()
-
-    """ Create result array """
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       result_c_type,
-                                                                       out,
-                                                                       device=x1_obj.sycl_device,
-                                                                       usm_type=x1_obj.usm_type,
-                                                                       sycl_queue=x1_obj.sycl_queue)
-
-    if x1.size == 0 and axis is None:
-        return result
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    """ Call FPTR interface function """
-    cdef dpnp_reduction_c_t func = <dpnp_reduction_c_t > kernel_data.ptr
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    result.get_data(),
-                                                    x1.get_data(),
-                                                    x1_shape.data(),
-                                                    x1_shape.size(),
-                                                    axis_shape.data(),
-                                                    axis_shape.size(),
-                                                    NULL,
-                                                    NULL,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef utils.dpnp_descriptor dpnp_trapz(utils.dpnp_descriptor y1, utils.dpnp_descriptor x1, double dx):
 
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(y1.dtype)
diff --git a/dpnp/dpnp_algo/dpnp_algo_sorting.pxi b/dpnp/dpnp_algo/dpnp_algo_sorting.pxi
index 4d8b475cce4..069b5335c1c 100644
--- a/dpnp/dpnp_algo/dpnp_algo_sorting.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_sorting.pxi
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,10 +36,8 @@ and the rest of the library
 # NO IMPORTs here. All imports must be placed into main "dpnp_algo.pyx" file
 
 __all__ += [
-    "dpnp_argsort",
     "dpnp_partition",
     "dpnp_searchsorted",
-    "dpnp_sort"
 ]
 
 
@@ -61,13 +59,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*fptr_dpnp_searchsorted_t)(c_dpctl.DPCTLSyclQ
                                                               const c_dpctl.DPCTLEventVectorRef)
 
 
-cpdef utils.dpnp_descriptor dpnp_argsort(utils.dpnp_descriptor x1):
-    cdef shape_type_c result_shape = x1.shape
-    if result_shape == ():
-        result_shape = (1,)
-    return call_fptr_1in_1out(DPNP_FN_ARGSORT_EXT, x1, result_shape)
-
-
 cpdef utils.dpnp_descriptor dpnp_partition(utils.dpnp_descriptor arr, int kth, axis=-1, kind='introselect', order=None):
     cdef shape_type_c shape1 = arr.shape
 
@@ -148,7 +139,3 @@ cpdef utils.dpnp_descriptor dpnp_searchsorted(utils.dpnp_descriptor arr, utils.d
     c_dpctl.DPCTLEvent_Delete(event_ref)
 
     return result
-
-
-cpdef utils.dpnp_descriptor dpnp_sort(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out(DPNP_FN_SORT_EXT, x1, x1.shape)
diff --git a/dpnp/dpnp_algo/dpnp_algo_special.pxi b/dpnp/dpnp_algo/dpnp_algo_special.pxi
index 17974261384..fe3ce5b8dc5 100644
--- a/dpnp/dpnp_algo/dpnp_algo_special.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_special.pxi
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
index 37d51d131ff..bd54f2091c1 100644
--- a/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_statistics.pxi
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -36,7 +36,6 @@ and the rest of the library
 # NO IMPORTs here. All imports must be placed into main "dpnp_algo.pyx" file
 
 __all__ += [
-    "dpnp_average",
     "dpnp_correlate",
     "dpnp_median",
 ]
@@ -49,15 +48,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_statistic_1in_1out_func_ptr_t)(c_dpct
                                                                           const c_dpctl.DPCTLEventVectorRef)
 
 
-cpdef dpnp_average(utils.dpnp_descriptor x1):
-    array_sum = dpnp_sum(x1).get_pyobj()
-
-    """ Numpy interface inconsistency """
-    return_type = dpnp.float32 if (x1.dtype == dpnp.float32) else dpnp.float64
-
-    return (return_type(array_sum / x1.size))
-
-
 cpdef utils.dpnp_descriptor dpnp_correlate(utils.dpnp_descriptor x1, utils.dpnp_descriptor x2):
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(x1.dtype)
     cdef DPNPFuncType param2_type = dpnp_dtype_to_DPNPFuncType(x2.dtype)
diff --git a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi
index 41a29b27557..14348d852dc 100644
--- a/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi
+++ b/dpnp/dpnp_algo/dpnp_algo_trigonometric.pxi
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -38,7 +38,6 @@ and the rest of the library
 __all__ += [
     'dpnp_degrees',
     'dpnp_radians',
-    'dpnp_recip',
     'dpnp_unwrap'
 ]
 
@@ -47,10 +46,6 @@ cpdef utils.dpnp_descriptor dpnp_degrees(utils.dpnp_descriptor x1):
     return call_fptr_1in_1out_strides(DPNP_FN_DEGREES_EXT, x1)
 
 
-cpdef utils.dpnp_descriptor dpnp_recip(utils.dpnp_descriptor x1):
-    return call_fptr_1in_1out_strides(DPNP_FN_RECIP_EXT, x1)
-
-
 cpdef utils.dpnp_descriptor dpnp_radians(utils.dpnp_descriptor x1):
     return call_fptr_1in_1out_strides(DPNP_FN_RADIANS_EXT, x1)
 
diff --git a/dpnp/dpnp_algo/dpnp_arraycreation.py b/dpnp/dpnp_algo/dpnp_arraycreation.py
index b6d3c612068..0399deea254 100644
--- a/dpnp/dpnp_algo/dpnp_arraycreation.py
+++ b/dpnp/dpnp_algo/dpnp_arraycreation.py
@@ -1,5 +1,7 @@
+import math
 import operator
 
+import dpctl.utils as dpu
 import numpy
 
 import dpnp
@@ -10,6 +12,7 @@
     "dpnp_geomspace",
     "dpnp_linspace",
     "dpnp_logspace",
+    "dpnp_nd_grid",
 ]
 
 
@@ -256,3 +259,134 @@ def dpnp_logspace(
     if dtype is None:
         return dpnp.power(base, res)
     return dpnp.power(base, res).astype(dtype, copy=False)
+
+
+class dpnp_nd_grid:
+    """
+    Construct a multi-dimensional "meshgrid".
+
+    ``grid = dpnp_nd_grid()`` creates an instance which will return a mesh-grid
+    when indexed. The dimension and number of the output arrays are equal
+    to the number of indexing dimensions.  If the step length is not a
+    complex number, then the stop is not inclusive.
+
+    However, if the step length is a complex number (e.g. 5j), then the
+    integer part of its magnitude is interpreted as specifying the
+    number of points to create between the start and stop values, where
+    the stop value is inclusive.
+
+    If instantiated with an argument of ``sparse=True``, the mesh-grid is
+    open (or not fleshed out) so that only one-dimension of each returned
+    argument is greater than 1.
+
+    Parameters
+    ----------
+    sparse : bool, optional
+        Whether the grid is sparse or not. Default is False.
+
+    """
+
+    def __init__(
+        self, sparse=False, device=None, usm_type="device", sycl_queue=None
+    ):
+        dpu.validate_usm_type(usm_type, allow_none=False)
+        self.sparse = sparse
+        self.usm_type = usm_type
+        self.sycl_queue_normalized = dpnp.get_normalized_queue_device(
+            sycl_queue=sycl_queue, device=device
+        )
+
+    def __getitem__(self, key):
+        if isinstance(key, slice):
+            step = key.step
+            stop = key.stop
+            start = key.start
+            if start is None:
+                start = 0
+            if isinstance(step, complex):
+                step = abs(step)
+                length = int(step)
+                if step != 1:
+                    step = (stop - start) / float(step - 1)
+                stop = stop + step
+                return (
+                    dpnp.arange(
+                        0,
+                        length,
+                        1,
+                        dtype=dpnp.default_float_type(),
+                        usm_type=self.usm_type,
+                        sycl_queue=self.sycl_queue_normalized,
+                    )
+                    * step
+                    + start
+                )
+            else:
+                return dpnp.arange(
+                    start,
+                    stop,
+                    step,
+                    usm_type=self.usm_type,
+                    sycl_queue=self.sycl_queue_normalized,
+                )
+
+        size = []
+        dtype = int
+        for k in range(len(key)):
+            step = key[k].step
+            start = key[k].start
+            stop = key[k].stop
+            if start is None:
+                start = 0
+            if step is None:
+                step = 1
+            if isinstance(step, complex):
+                size.append(int(abs(step)))
+                dtype = dpnp.default_float_type()
+            else:
+                size.append(
+                    int(math.ceil((key[k].stop - start) / (step * 1.0)))
+                )
+            if (
+                isinstance(step, float)
+                or isinstance(start, float)
+                or isinstance(stop, float)
+            ):
+                dtype = dpnp.default_float_type()
+        if self.sparse:
+            nn = [
+                dpnp.arange(
+                    _x,
+                    dtype=_t,
+                    usm_type=self.usm_type,
+                    sycl_queue=self.sycl_queue_normalized,
+                )
+                for _x, _t in zip(size, (dtype,) * len(size))
+            ]
+        else:
+            nn = dpnp.indices(
+                size,
+                dtype,
+                usm_type=self.usm_type,
+                sycl_queue=self.sycl_queue_normalized,
+            )
+        for k in range(len(size)):
+            step = key[k].step
+            start = key[k].start
+            stop = key[k].stop
+            if start is None:
+                start = 0
+            if step is None:
+                step = 1
+            if isinstance(step, complex):
+                step = int(abs(step))
+                if step != 1:
+                    step = (stop - start) / float(step - 1)
+            nn[k] = nn[k] * step + start
+        if self.sparse:
+            slobj = [dpnp.newaxis] * len(size)
+            for k in range(len(size)):
+                slobj[k] = slice(None, None)
+                nn[k] = nn[k][tuple(slobj)]
+                slobj[k] = dpnp.newaxis
+        return nn
diff --git a/dpnp/dpnp_algo/dpnp_elementwise_common.py b/dpnp/dpnp_algo/dpnp_elementwise_common.py
index bd7babbe01d..0bdcd103572 100644
--- a/dpnp/dpnp_algo/dpnp_elementwise_common.py
+++ b/dpnp/dpnp_algo/dpnp_elementwise_common.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2023, Intel Corporation
+# Copyright (c) 2023-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -38,6 +38,7 @@
     "dpnp_acos",
     "dpnp_acosh",
     "dpnp_add",
+    "dpnp_angle",
     "dpnp_asin",
     "dpnp_asinh",
     "dpnp_atan",
@@ -88,6 +89,7 @@
     "dpnp_power",
     "dpnp_proj",
     "dpnp_real",
+    "dpnp_reciprocal",
     "dpnp_remainder",
     "dpnp_right_shift",
     "dpnp_round",
@@ -177,11 +179,19 @@ def check_nd_call_func(
         )
 
 
+def _get_result(res_usm, out=None):
+    if out is None:
+        return dpnp_array._create_from_usm_ndarray(res_usm)
+    else:
+        return out
+
+
 def _make_unary_func(
     name, dpt_unary_fn, fn_docstring, mkl_fn_to_call=None, mkl_impl_fn=None
 ):
     impl_fn = dpt_unary_fn.get_implementation_function()
     type_resolver_fn = dpt_unary_fn.get_type_result_resolver_function()
+    acceptance_fn = dpt_unary_fn.get_type_promotion_path_acceptance_function()
 
     def _call_func(src, dst, sycl_queue, depends=None):
         """A callback to register in UnaryElementwiseFunc class of dpctl.tensor"""
@@ -195,7 +205,7 @@ def _call_func(src, dst, sycl_queue, depends=None):
         return impl_fn(src, dst, sycl_queue, depends)
 
     func = dpt_unary_fn.__class__(
-        name, type_resolver_fn, _call_func, fn_docstring
+        name, type_resolver_fn, _call_func, fn_docstring, acceptance_fn
     )
     return func
 
@@ -273,7 +283,7 @@ def dpnp_abs(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = abs_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _acos_docstring = """
@@ -314,7 +324,7 @@ def dpnp_acos(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = acos_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _acosh_docstring = """
@@ -355,7 +365,7 @@ def dpnp_acosh(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = acosh_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _add_docstring = """
@@ -401,7 +411,43 @@ def dpnp_add(x1, x2, out=None, order="K"):
     res_usm = add_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
+
+
+_angle_docstring = """
+angle(x, out=None, order="K")
+
+Computes the phase angle (also called the argument) of each element `x_i` for
+input array `x`.
+
+Args:
+    x (dpnp.ndarray):
+        Input array, expected to have a complex-valued floating-point data type.
+    out ({None, dpnp.ndarray}, optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C", "F", "A", "K", optional):
+        Memory layout of the newly output array, if parameter `out` is ``None``.
+        Default: "K".
+Returns:
+    dpnp.ndarray:
+        An array containing the element-wise phase angles.
+        The returned array has a floating-point data type determined
+        by the Type Promotion Rules.
+"""
+
+angle_func = _make_unary_func("angle", dpt.angle, _angle_docstring)
+
+
+def dpnp_angle(x, out=None, order="K"):
+    """Invokes angle() from dpctl.tensor implementation for angle() function."""
+
+    # dpctl.tensor only works with usm_ndarray
+    x1_usm = dpnp.get_usm_ndarray(x)
+    out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+
+    res_usm = angle_func(x1_usm, out=out_usm, order=order)
+    return _get_result(res_usm, out=out)
 
 
 _asin_docstring = """
@@ -442,7 +488,7 @@ def dpnp_asin(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = asin_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _asinh_docstring = """
@@ -483,7 +529,7 @@ def dpnp_asinh(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = asinh_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _atan_docstring = """
@@ -524,7 +570,7 @@ def dpnp_atan(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = atan_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _atan2_docstring = """
@@ -574,7 +620,7 @@ def dpnp_atan2(x1, x2, out=None, order="K"):
     res_usm = atan2_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _atanh_docstring = """
@@ -615,7 +661,7 @@ def dpnp_atanh(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = atanh_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _bitwise_and_docstring = """
@@ -659,7 +705,7 @@ def dpnp_bitwise_and(x1, x2, out=None, order="K"):
     res_usm = bitwise_and_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _bitwise_or_docstring = """
@@ -703,7 +749,7 @@ def dpnp_bitwise_or(x1, x2, out=None, order="K"):
     res_usm = bitwise_or_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _bitwise_xor_docstring = """
@@ -747,7 +793,7 @@ def dpnp_bitwise_xor(x1, x2, out=None, order="K"):
     res_usm = bitwise_xor_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _cbrt_docstring = """
@@ -787,7 +833,7 @@ def dpnp_cbrt(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = cbrt_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _ceil_docstring = """
@@ -827,7 +873,7 @@ def dpnp_ceil(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = ceil_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _conj_docstring = """
@@ -866,7 +912,7 @@ def dpnp_conj(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = conj_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _copysign_docstring = """
@@ -907,7 +953,7 @@ def dpnp_copysign(x1, x2, out=None, order="K"):
     res_usm = copysign_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _cos_docstring = """
@@ -947,7 +993,7 @@ def dpnp_cos(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = cos_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _cosh_docstring = """
@@ -987,7 +1033,7 @@ def dpnp_cosh(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = cosh_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _divide_docstring = """
@@ -1033,7 +1079,7 @@ def dpnp_divide(x1, x2, out=None, order="K"):
     res_usm = divide_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _equal_docstring = """
@@ -1073,7 +1119,7 @@ def dpnp_equal(x1, x2, out=None, order="K"):
     res_usm = equal_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _exp_docstring = """
@@ -1114,7 +1160,7 @@ def dpnp_exp(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = exp_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _exp2_docstring = """
@@ -1155,7 +1201,7 @@ def dpnp_exp2(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = exp2_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _expm1_docstring = """
@@ -1198,7 +1244,7 @@ def dpnp_expm1(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = expm1_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _floor_docstring = """
@@ -1238,7 +1284,7 @@ def dpnp_floor(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = floor_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _floor_divide_docstring = """
@@ -1282,7 +1328,7 @@ def dpnp_floor_divide(x1, x2, out=None, order="K"):
     res_usm = floor_divide_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _greater_docstring = """
@@ -1322,7 +1368,7 @@ def dpnp_greater(x1, x2, out=None, order="K"):
     res_usm = greater_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _greater_equal_docstring = """
@@ -1364,7 +1410,7 @@ def dpnp_greater_equal(x1, x2, out=None, order="K"):
     res_usm = greater_equal_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _hypot_docstring = """
@@ -1410,7 +1456,7 @@ def dpnp_hypot(x1, x2, out=None, order="K"):
     res_usm = hypot_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _imag_docstring = """
@@ -1447,7 +1493,7 @@ def dpnp_imag(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = imag_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _invert_docstring = """
@@ -1482,7 +1528,7 @@ def dpnp_invert(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = invert_func(x_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _isfinite_docstring = """
@@ -1517,7 +1563,7 @@ def dpnp_isfinite(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = isfinite_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _isinf_docstring = """
@@ -1551,7 +1597,7 @@ def dpnp_isinf(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = isinf_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _isnan_docstring = """
@@ -1585,7 +1631,7 @@ def dpnp_isnan(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = isnan_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _left_shift_docstring = """
@@ -1629,7 +1675,7 @@ def dpnp_left_shift(x1, x2, out=None, order="K"):
     res_usm = left_shift_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _less_docstring = """
@@ -1669,7 +1715,7 @@ def dpnp_less(x1, x2, out=None, order="K"):
     res_usm = less_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _less_equal_docstring = """
@@ -1711,7 +1757,7 @@ def dpnp_less_equal(x1, x2, out=None, order="K"):
     res_usm = less_equal_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _log_docstring = """
@@ -1752,7 +1798,7 @@ def dpnp_log(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = log_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _log10_docstring = """
@@ -1793,7 +1839,7 @@ def dpnp_log10(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = log10_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _log1p_docstring = """
@@ -1833,7 +1879,7 @@ def dpnp_log1p(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = log1p_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _log2_docstring = """
@@ -1874,7 +1920,7 @@ def dpnp_log2(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = log2_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _logaddexp_docstring = """
@@ -1922,7 +1968,7 @@ def dpnp_logaddexp(x1, x2, out=None, order="K"):
     res_usm = logaddexp_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _logical_and_docstring = """
@@ -1963,7 +2009,7 @@ def dpnp_logical_and(x1, x2, out=None, order="K"):
     res_usm = logical_and_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _logical_not_docstring = """
@@ -1998,7 +2044,7 @@ def dpnp_logical_not(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = logical_not_func(x_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _logical_or_docstring = """
@@ -2039,7 +2085,7 @@ def dpnp_logical_or(x1, x2, out=None, order="K"):
     res_usm = logical_or_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _logical_xor_docstring = """
@@ -2080,7 +2126,7 @@ def dpnp_logical_xor(x1, x2, out=None, order="K"):
     res_usm = logical_xor_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _maximum_docstring = """
@@ -2120,7 +2166,7 @@ def dpnp_maximum(x1, x2, out=None, order="K"):
     res_usm = maximum_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _minimum_docstring = """
@@ -2160,7 +2206,7 @@ def dpnp_minimum(x1, x2, out=None, order="K"):
     res_usm = minimum_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _multiply_docstring = """
@@ -2210,7 +2256,7 @@ def dpnp_multiply(x1, x2, out=None, order="K"):
     res_usm = multiply_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _negative_docstring = """
@@ -2250,7 +2296,7 @@ def dpnp_negative(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = negative_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _not_equal_docstring = """
@@ -2292,7 +2338,7 @@ def dpnp_not_equal(x1, x2, out=None, order="K"):
     res_usm = not_equal_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _positive_docstring = """
@@ -2331,7 +2377,7 @@ def dpnp_positive(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = positive_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _power_docstring = """
@@ -2378,7 +2424,7 @@ def dpnp_power(x1, x2, out=None, order="K"):
     res_usm = power_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _proj_docstring = """
@@ -2412,7 +2458,7 @@ def dpnp_proj(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = proj_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _real_docstring = """
@@ -2449,7 +2495,44 @@ def dpnp_real(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = real_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
+
+
+_reciprocal_docstring = """
+reciprocal(x, out=None, order="K")
+
+Computes the reciprocal of each element `x_i` for input array `x`.
+
+Args:
+    x (dpnp.ndarray):
+        Input array, expected to have a real-valued floating-point data type.
+    out ({None, dpnp.ndarray}, optional):
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order ("C", "F", "A", "K", optional):
+        Memory layout of the newly output array, if parameter `out` is ``None``.
+        Default: "K".
+Returns:
+    dpnp.ndarray:
+        An array containing the element-wise reciprocals.
+        The returned array has a floating-point data type determined
+        by the Type Promotion Rules.
+"""
+
+reciprocal_func = _make_unary_func(
+    "reciprocal", dpt.reciprocal, _reciprocal_docstring
+)
+
+
+def dpnp_reciprocal(x, out=None, order="K"):
+    """Invokes reciprocal() from dpctl.tensor implementation for reciprocal() function."""
+
+    # dpctl.tensor only works with usm_ndarray
+    x1_usm = dpnp.get_usm_ndarray(x)
+    out_usm = None if out is None else dpnp.get_usm_ndarray(out)
+
+    res_usm = reciprocal_func(x1_usm, out=out_usm, order=order)
+    return _get_result(res_usm, out=out)
 
 
 _remainder_docstring = """
@@ -2490,7 +2573,7 @@ def dpnp_remainder(x1, x2, out=None, order="K"):
     res_usm = remainder_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _right_shift_docstring = """
@@ -2533,7 +2616,7 @@ def dpnp_right_shift(x1, x2, out=None, order="K"):
     res_usm = right_shift_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _round_docstring = """
@@ -2573,7 +2656,7 @@ def dpnp_round(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = round_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _rsqrt_docstring = """
@@ -2608,7 +2691,7 @@ def dpnp_rsqrt(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = rsqrt_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _sign_docstring = """
@@ -2650,7 +2733,7 @@ def dpnp_sign(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = sign_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _signbit_docstring = """
@@ -2685,7 +2768,7 @@ def dpnp_signbit(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = signbit_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _sin_docstring = """
@@ -2725,7 +2808,7 @@ def dpnp_sin(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = sin_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _sinh_docstring = """
@@ -2765,7 +2848,7 @@ def dpnp_sinh(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = sinh_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _sqrt_docstring = """
@@ -2804,7 +2887,7 @@ def dpnp_sqrt(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = sqrt_func(x_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _square_docstring = """
@@ -2843,7 +2926,7 @@ def dpnp_square(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = square_func(x_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _subtract_docstring = """
@@ -2904,7 +2987,7 @@ def dpnp_subtract(x1, x2, out=None, order="K"):
     res_usm = subtract_func(
         x1_usm_or_scalar, x2_usm_or_scalar, out=out_usm, order=order
     )
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _tan_docstring = """
@@ -2944,7 +3027,7 @@ def dpnp_tan(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = tan_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _tanh_docstring = """
@@ -2984,7 +3067,7 @@ def dpnp_tanh(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = tanh_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
 
 
 _trunc_docstring = """
@@ -3026,4 +3109,4 @@ def dpnp_trunc(x, out=None, order="K"):
     out_usm = None if out is None else dpnp.get_usm_ndarray(out)
 
     res_usm = trunc_func(x1_usm, out=out_usm, order=order)
-    return dpnp_array._create_from_usm_ndarray(res_usm)
+    return _get_result(res_usm, out=out)
diff --git a/dpnp/dpnp_array.py b/dpnp/dpnp_array.py
index 314491fcff5..cf848b50690 100644
--- a/dpnp/dpnp_array.py
+++ b/dpnp/dpnp_array.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -510,39 +510,7 @@ def argsort(self, axis=-1, kind=None, order=None):
         """
         Return an ndarray of indices that sort the array along the specified axis.
 
-        Parameters
-        ----------
-        axis : int, optional
-            Axis along which to sort. If None, the default, the flattened array
-            is used.
-            ..  versionchanged:: 1.13.0
-                Previously, the default was documented to be -1, but that was
-                in error. At some future date, the default will change to -1, as
-                originally intended.
-                Until then, the axis should be given explicitly when
-                ``arr.ndim > 1``, to avoid a FutureWarning.
-        kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
-            The sorting algorithm used.
-        order : list, optional
-            When `a` is an array with fields defined, this argument specifies
-            which fields to compare first, second, etc.  Not all fields need be
-            specified.
-
-        Returns
-        -------
-        index_array : ndarray, int
-            Array of indices that sort `a` along the specified axis.
-            In other words, ``a[index_array]`` yields a sorted `a`.
-
-        See Also
-        --------
-        MaskedArray.sort : Describes sorting algorithms used.
-        :obj:`dpnp.lexsort` : Indirect stable sort with multiple keys.
-        :obj:`numpy.ndarray.sort` : Inplace sort.
-
-        Notes
-        -----
-        See `sort` for notes on the different sorting algorithms.
+        Refer to :obj:`dpnp.argsort` for full documentation.
 
         """
         return dpnp.argsort(self, axis, kind, order)
@@ -1163,14 +1131,44 @@ def size(self):
 
         return self._array_obj.size
 
-    # 'sort',
+    def sort(self, axis=-1, kind=None, order=None):
+        """
+        Sort an array in-place.
+
+        Refer to :obj:`dpnp.sort` for full documentation.
+
+        Note
+        ----
+        `axis` in :obj:`dpnp.sort` could be integr or ``None``. If ``None``,
+        the array is flattened before sorting. However, `axis` in :obj:`dpnp.ndarray.sort`
+        can only be integer since it sorts an array in-place.
+
+        Examples
+        --------
+        >>> import dpnp as np
+        >>> a = np.array([[1,4],[3,1]])
+        >>> a.sort(axis=1)
+        >>> a
+        array([[1, 4],
+              [1, 3]])
+        >>> a.sort(axis=0)
+        >>> a
+        array([[1, 1],
+              [3, 4]])
+
+        """
+
+        if axis is None:
+            raise TypeError(
+                "'NoneType' object cannot be interpreted as an integer"
+            )
+        self[...] = dpnp.sort(self, axis=axis, kind=kind, order=order)
 
     def squeeze(self, axis=None):
         """
         Remove single-dimensional entries from the shape of an array.
 
-        .. seealso::
-           :obj:`dpnp.squeeze` for full documentation
+        Refer to :obj:`dpnp.squeeze` for full documentation
 
         """
 
diff --git a/dpnp/dpnp_container.py b/dpnp/dpnp_container.py
index fac883a775b..3aa4478baa0 100644
--- a/dpnp/dpnp_container.py
+++ b/dpnp/dpnp_container.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_flatiter.py b/dpnp/dpnp_flatiter.py
index 00e6ec44770..bdf62e915a5 100644
--- a/dpnp/dpnp_flatiter.py
+++ b/dpnp/dpnp_flatiter.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_iface.py b/dpnp/dpnp_iface.py
index 215509c1fc3..9aee27b73bc 100644
--- a/dpnp/dpnp_iface.py
+++ b/dpnp/dpnp_iface.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_iface_arraycreation.py b/dpnp/dpnp_iface_arraycreation.py
index edcfe7ab3fc..067eb3fbb52 100644
--- a/dpnp/dpnp_iface_arraycreation.py
+++ b/dpnp/dpnp_iface_arraycreation.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -52,6 +52,7 @@
     dpnp_geomspace,
     dpnp_linspace,
     dpnp_logspace,
+    dpnp_nd_grid,
 )
 
 __all__ = [
@@ -787,7 +788,7 @@ def empty_like(
 
     Limitations
     -----------
-    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
+    Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`
     Parameter `order` is supported with values ``"C"`` or ``"F"``.
     Parameter `subok` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -1049,7 +1050,7 @@ def full_like(
 
     Limitations
     -----------
-    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
+    Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`
     Parameter `order` is supported only with values ``"C"`` and ``"F"``.
     Parameter `subok` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -1395,7 +1396,7 @@ def meshgrid(*xi, copy=True, sparse=False, indexing="xy"):
 
     Limitations
     -----------
-    Each array instance from `xi` is supported as either :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`.
+    Each array instance from `xi` is supported as either :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
     Parameter `copy` is supported only with default value ``True``.
     Parameter `sparse` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -1452,6 +1453,24 @@ class MGridClass:
 
     For full documentation refer to :obj:`numpy.mgrid`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : one dpnp.ndarray or tuple of dpnp.ndarray
+        Returns one array of grid indices, grid.shape = (len(dimensions),) + tuple(dimensions).
+
     Examples
     --------
     >>> import dpnp as np
@@ -1466,13 +1485,31 @@ class MGridClass:
             [0, 1, 2, 3, 4],
             [0, 1, 2, 3, 4],
             [0, 1, 2, 3, 4]]])
-    >>> np.mgrid[-1:1:5j]
+
+    >>> x = np.mgrid[-1:1:5j]
+    >>> x
     array([-1. , -0.5,  0. ,  0.5,  1. ])
+    >>> x.usm_type
+    'device'
+
+    >>> y = np.mgrid(usm_type="host")[-1:1:5j]
+    >>> y
+    array([-1. , -0.5,  0. ,  0.5,  1. ])
+    >>> x.usm_type
+    'host'
 
     """
 
     def __getitem__(self, key):
-        return dpnp.array(numpy.mgrid[key])
+        return dpnp_nd_grid(sparse=False)[key]
+
+    def __call__(self, device=None, usm_type="device", sycl_queue=None):
+        return dpnp_nd_grid(
+            sparse=False,
+            device=device,
+            usm_type=usm_type,
+            sycl_queue=sycl_queue,
+        )
 
 
 mgrid = MGridClass()
@@ -1484,23 +1521,56 @@ class OGridClass:
 
     For full documentation refer to :obj:`numpy.ogrid`.
 
+    Parameters
+    ----------
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : one dpnp.ndarray or tuple of dpnp.ndarray
+        Returns a tuple of arrays, with grid[i].shape = (1, ..., 1, dimensions[i], 1, ..., 1)
+        with dimensions[i] in the ith place.
+
     Examples
     --------
     >>> import dpnp as np
-    >>> from numpy import ogrid
-    >>> ogrid[-1:1:5j]
-    array([-1. , -0.5,  0. ,  0.5,  1. ])
-    >>> ogrid[0:5,0:5]
+    >>> np.ogrid[0:5, 0:5]
     [array([[0],
             [1],
             [2],
             [3],
             [4]]), array([[0, 1, 2, 3, 4]])]
 
+    >>> x = np.ogrid[-1:1:5j]
+    >>> x
+    array([-1. , -0.5,  0. ,  0.5,  1. ])
+    >>> x.usm_type
+    'device'
+
+    >>> y = np.ogrid(usm_type="host")[-1:1:5j]
+    >>> y
+    array([-1. , -0.5,  0. ,  0.5,  1. ])
+    >>> x.usm_type
+    'host'
+
     """
 
     def __getitem__(self, key):
-        return dpnp.array(numpy.ogrid[key])
+        return dpnp_nd_grid(sparse=True)[key]
+
+    def __call__(self, device=None, usm_type="device", sycl_queue=None):
+        return dpnp_nd_grid(
+            sparse=True, device=device, usm_type=usm_type, sycl_queue=sycl_queue
+        )
 
 
 ogrid = OGridClass()
@@ -1583,7 +1653,7 @@ def ones_like(
 
     Limitations
     -----------
-    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
+    Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`
     Parameter `order` is supported with values ``"C"`` or ``"F"``.
     Parameter `subok` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
@@ -1753,7 +1823,7 @@ def tril(x1, /, *, k=0):
 
     Limitations
     -----------
-    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions.
+    Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions.
     Parameter `k` is supported only of integer data type.
     Otherwise the function will be executed sequentially on CPU.
 
@@ -1797,7 +1867,7 @@ def triu(x1, /, *, k=0):
 
     Limitations
     -----------
-    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions.
+    Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray` with two or more dimensions.
     Parameter `k` is supported only of integer data type.
     Otherwise the function will be executed sequentially on CPU.
 
@@ -1985,7 +2055,7 @@ def zeros_like(
 
     Limitations
     -----------
-    Parameter `x1` is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`
+    Parameter `x1` is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`
     Parameter `order` is supported with values ``"C"`` or ``"F"``.
     Parameter `subok` is supported only with default value ``False``.
     Otherwise the function will be executed sequentially on CPU.
diff --git a/dpnp/dpnp_iface_bitwise.py b/dpnp/dpnp_iface_bitwise.py
index bac53b2cbc6..1d7f3432e87 100644
--- a/dpnp/dpnp_iface_bitwise.py
+++ b/dpnp/dpnp_iface_bitwise.py
@@ -1,8 +1,6 @@
-# cython: language_level=3
-# distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_iface_counting.py b/dpnp/dpnp_iface_counting.py
index 35301a6b9db..fe477e272f3 100644
--- a/dpnp/dpnp_iface_counting.py
+++ b/dpnp/dpnp_iface_counting.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_iface_indexing.py b/dpnp/dpnp_iface_indexing.py
index ef21c3b9b18..8f973ed1f1a 100644
--- a/dpnp/dpnp_iface_indexing.py
+++ b/dpnp/dpnp_iface_indexing.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -355,31 +355,117 @@ def fill_diagonal(x1, val, wrap=False):
     return call_origin(numpy.fill_diagonal, x1, val, wrap, dpnp_inplace=True)
 
 
-def indices(dimensions, dtype=int, sparse=False):
+def indices(
+    dimensions,
+    dtype=int,
+    sparse=False,
+    device=None,
+    usm_type="device",
+    sycl_queue=None,
+):
     """
     Return an array representing the indices of a grid.
 
+    Compute an array where the subarrays contain index values 0, 1, …
+    varying only along the corresponding axis.
+
     For full documentation refer to :obj:`numpy.indices`.
 
-    Limitations
-    -----------
-    Parameters `dtype` and `sparse` are supported only with default values.
-    Parameter `dimensions` is supported with len <=2.
+    Parameters
+    ----------
+    dimensions : sequence of ints
+        The shape of the grid.
+    dtype : dtype, optional
+        Data type of the result.
+    sparse : boolean, optional
+        Return a sparse representation of the grid instead of a dense representation.
+        Default is ``False``.
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where the output array is created.
+        The `device` can be ``None`` (the default), an OneAPI filter selector string,
+        an instance of :class:`dpctl.SyclDevice` corresponding to a non-partitioned SYCL device,
+        an instance of :class:`dpctl.SyclQueue`, or a `Device` object returned by
+        :obj:`dpnp.dpnp_array.dpnp_array.device` property.
+    usm_type : {"device", "shared", "host"}, optional
+        The type of SYCL USM allocation for the output array.
+    sycl_queue : {None, SyclQueue}, optional
+        A SYCL queue to use for output array allocation and copying.
+
+    Returns
+    -------
+    out : one dpnp.ndarray or tuple of dpnp.ndarray
+        If sparse is ``False``:
+        Returns one array of grid indices, grid.shape = (len(dimensions),) + tuple(dimensions).
+
+        If sparse is ``True``:
+        Returns a tuple of arrays, with grid[i].shape = (1, ..., 1, dimensions[i], 1, ..., 1)
+        with dimensions[i] in the ith place.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> grid = np.indices((2, 3))
+    >>> grid.shape
+    (2, 2, 3)
+    >>> grid[0]
+    array([[0, 0, 0],
+           [1, 1, 1]])
+    >>> grid[1]
+    array([[0, 1, 2],
+           [0, 1, 2]])
+
+    The indices can be used as an index into an array.
+
+    >>> x = np.arange(20).reshape(5, 4)
+    >>> row, col = np.indices((2, 3))
+    >>> x[row, col]
+    array([[0, 1, 2],
+           [4, 5, 6]])
+
+    Note that it would be more straightforward in the above example to
+    extract the required elements directly with ``x[:2, :3]``.
+    If sparse is set to ``True``, the grid will be returned in a sparse
+    representation.
+
+    >>> i, j = np.indices((2, 3), sparse=True)
+    >>> i.shape
+    (2, 1)
+    >>> j.shape
+    (1, 3)
+    >>> i
+    array([[0],
+           [1]])
+    >>> j
+    array([[0, 1, 2]])
 
     """
 
-    if not isinstance(dimensions, (tuple, list)):
-        pass
-    elif len(dimensions) > 2 or len(dimensions) == 0:
-        pass
-    elif dtype != int:
-        pass
-    elif sparse:
-        pass
+    dimensions = tuple(dimensions)
+    N = len(dimensions)
+    shape = (1,) * N
+    if sparse:
+        res = ()
     else:
-        return dpnp_indices(dimensions)
-
-    return call_origin(numpy.indices, dimensions, dtype, sparse)
+        res = dpnp.empty(
+            (N,) + dimensions,
+            dtype=dtype,
+            device=device,
+            usm_type=usm_type,
+            sycl_queue=sycl_queue,
+        )
+    for i, dim in enumerate(dimensions):
+        idx = dpnp.arange(
+            dim,
+            dtype=dtype,
+            device=device,
+            usm_type=usm_type,
+            sycl_queue=sycl_queue,
+        ).reshape(shape[:i] + (dim,) + shape[i + 1 :])
+        if sparse:
+            res = res + (idx,)
+        else:
+            res[i] = idx
+    return res
 
 
 def nonzero(x, /):
@@ -765,6 +851,7 @@ def take_along_axis(a, indices, axis):
     --------
     :obj:`dpnp.take` : Take along an axis, using the same indices for every 1d slice.
     :obj:`dpnp.put_along_axis` : Put values into the destination array by matching 1d index and data slices.
+    :obj:`dpnp.argsort` : Return the indices that would sort an array.
 
     Examples
     --------
diff --git a/dpnp/dpnp_iface_libmath.py b/dpnp/dpnp_iface_libmath.py
index 3d016e7d1d0..cafbf40ba88 100644
--- a/dpnp/dpnp_iface_libmath.py
+++ b/dpnp/dpnp_iface_libmath.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_iface_linearalgebra.py b/dpnp/dpnp_iface_linearalgebra.py
index 30b6134da17..d39b84a50ec 100644
--- a/dpnp/dpnp_iface_linearalgebra.py
+++ b/dpnp/dpnp_iface_linearalgebra.py
@@ -1,8 +1,6 @@
-# cython: language_level=3
-# distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -46,6 +44,7 @@
 import dpnp
 from dpnp.dpnp_algo import *
 from dpnp.dpnp_utils import *
+from dpnp.dpnp_utils.dpnp_utils_linearalgebra import dpnp_matmul
 
 __all__ = [
     "dot",
@@ -246,7 +245,17 @@ def kron(x1, x2):
     return call_origin(numpy.kron, x1, x2)
 
 
-def matmul(x1, x2, out=None, **kwargs):
+def matmul(
+    x1,
+    x2,
+    /,
+    out=None,
+    *,
+    casting="same_kind",
+    order="K",
+    dtype=None,
+    subok=True,
+):
     """
     Matrix product of two arrays.
 
@@ -254,9 +263,9 @@ def matmul(x1, x2, out=None, **kwargs):
 
     Limitations
     -----------
-    Input arrays are supported as :obj:`dpnp.ndarray`.
-    Otherwise the function will be executed sequentially on CPU.
-    Parameter `out` is supported as :obj:`dpnp.ndarray` and as default value ``None``.
+    Input arrays and parameter `out` are supported as either :class:`dpnp.ndarray`
+    or :class:`dpctl.tensor.usm_ndarray`.
+    Keyword argument `subok` is currently unsupported.
     Input array data types are limited by supported DPNP :ref:`Data types`.
 
     See Also
@@ -269,63 +278,65 @@ def matmul(x1, x2, out=None, **kwargs):
 
     Examples
     --------
+    For 2-D arrays it is the matrix product:
+
     >>> import dpnp as np
-    >>> a = np.ones([9, 5, 7, 4])
-    >>> c = np.ones([9, 5, 4, 3])
-    >>> np.matmul(a, c).shape
-    (9, 5, 7, 3)
     >>> a = np.array([[1, 0], [0, 1]])
     >>> b = np.array([[4, 1], [2, 2]])
     >>> np.matmul(a, b)
     array([[4, 1],
            [2, 2]])
 
-    """
+    For 2-D mixed with 1-D, the result is the usual.
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    x2_desc = dpnp.get_dpnp_descriptor(x2, copy_when_nondefault_queue=False)
-    if x1_desc and x2_desc and not kwargs:
-        if x1_desc.ndim != 2 or x2_desc.ndim != 2:
-            pass
-        elif not x1_desc.ndim:
-            pass
-        elif not x2_desc.ndim:
-            pass
-        elif not x1_desc.size:
-            pass
-        elif not x2_desc.size:
-            pass
-        else:
-            if 0:
-                """
-                Cost model checks
-                """
-
-                array1_size = x1_desc.size
-                array2_size = x2_desc.size
-                cost_size = 4096  # 2D array shape(64, 64)
-
-                if (x1_desc.dtype == dpnp.float64) or (
-                    x1_desc.dtype == dpnp.float32
-                ):
-                    """
-                    Floating point types are handled via original math library better than SYCL math library
-                    """
-                    cost_size = 262144  # 2D array shape(512, 512)
-
-                if (array1_size > cost_size) and (array2_size > cost_size):
-                    return dpnp_matmul(x1_desc, x2_desc, out)
-            else:
-                out_desc = (
-                    dpnp.get_dpnp_descriptor(
-                        out, copy_when_nondefault_queue=False
-                    )
-                    if out is not None
-                    else None
-                )
-                return dpnp_matmul(x1_desc, x2_desc, out_desc).get_pyobj()
+    >>> a = np.array([[1, 0], [0, 1]])
+    >>> b = np.array([1, 2])
+    >>> np.matmul(a, b)
+    array([1, 2])
+    >>> np.matmul(b, a)
+    array([1, 2])
+
+    Broadcasting is conventional for stacks of arrays
 
-    return call_origin(numpy.matmul, x1, x2, out=out, **kwargs)
+    >>> a = np.arange(2 * 2 * 4).reshape((2, 2, 4))
+    >>> b = np.arange(2 * 2 * 4).reshape((2, 4, 2))
+    >>> np.matmul(a,b).shape
+    (2, 2, 2)
+    >>> np.matmul(a, b)[0, 1, 1]
+    array(98)
+    >>> np.sum(a[0, 1, :] * b[0 , :, 1])
+    array(98)
+
+    Vector, vector returns the scalar inner product, but neither argument is complex-conjugated:
+
+    >>> x1 = np.array([2j, 3j])
+    >>> x2 = np.array([2j, 3j])
+    >>> np.matmul(x1, x2)
+    array(-13+0j)
+
+    The ``@`` operator can be used as a shorthand for ``matmul`` on
+    :class:`dpnp.ndarray`.
+
+    >>> x1 @ x2
+    array(-13+0j)
+
+    """
+
+    dpnp.check_supported_arrays_type(x1)
+    dpnp.check_supported_arrays_type(x2)
+    if subok is False:
+        raise NotImplementedError(
+            "subok keyword argument is only supported by its default value."
+        )
+    else:
+        return dpnp_matmul(
+            x1,
+            x2,
+            out=out,
+            casting=casting,
+            order=order,
+            dtype=dtype,
+        )
 
 
 def outer(x1, x2, out=None):
diff --git a/dpnp/dpnp_iface_logic.py b/dpnp/dpnp_iface_logic.py
index 169e0bc607a..4e711f6c5dc 100644
--- a/dpnp/dpnp_iface_logic.py
+++ b/dpnp/dpnp_iface_logic.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_iface_manipulation.py b/dpnp/dpnp_iface_manipulation.py
index 9efb2aa04f1..6a48e4d1e10 100644
--- a/dpnp/dpnp_iface_manipulation.py
+++ b/dpnp/dpnp_iface_manipulation.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -1174,7 +1174,7 @@ def ravel(a, order="C"):
 
     Parameters
     ----------
-    x : {dpnp_array, usm_ndarray}
+    x : {dpnp.ndarray, usm_ndarray}
         Input array. The elements in `a` are read in the order specified by order,
         and packed as a 1-D array.
     order : {'C', 'F'}, optional
@@ -1187,7 +1187,7 @@ def ravel(a, order="C"):
 
     Returns
     -------
-    out : dpnp_array
+    out : dpnp.ndarray
         A contiguous 1-D array of the same subtype as `a`, with shape (a.size,).
 
     See Also
@@ -1220,7 +1220,7 @@ def repeat(a, repeats, axis=None):
 
     Parameters
     ----------
-    x : {dpnp_array, usm_ndarray}
+    x : {dpnp.ndarray, usm_ndarray}
         Input array.
     repeat : int or array of int
         The number of repetitions for each element. `repeats` is broadcasted to fit
@@ -1231,7 +1231,7 @@ def repeat(a, repeats, axis=None):
 
     Returns
     -------
-    out : dpnp_array
+    out : dpnp.ndarray
         Output array which has the same shape as `a`, except along the given axis.
 
     See Also
@@ -1380,7 +1380,9 @@ def result_type(*arrays_and_dtypes):
     """
 
     usm_arrays_and_dtypes = [
-        X.dtype if isinstance(X, (dpnp_array, dpt.usm_ndarray)) else X
+        dpnp.get_usm_ndarray(X)
+        if isinstance(X, (dpnp_array, dpt.usm_ndarray))
+        else X
         for X in arrays_and_dtypes
     ]
     return dpt.result_type(*usm_arrays_and_dtypes)
diff --git a/dpnp/dpnp_iface_mathematical.py b/dpnp/dpnp_iface_mathematical.py
index 20c5c922dbe..7ec846f770c 100644
--- a/dpnp/dpnp_iface_mathematical.py
+++ b/dpnp/dpnp_iface_mathematical.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -54,6 +54,7 @@
     check_nd_call_func,
     dpnp_abs,
     dpnp_add,
+    dpnp_angle,
     dpnp_ceil,
     dpnp_conj,
     dpnp_copysign,
@@ -82,6 +83,7 @@
     "abs",
     "absolute",
     "add",
+    "angle",
     "around",
     "ceil",
     "clip",
@@ -291,6 +293,56 @@ def add(
     )
 
 
+def angle(z, deg=False):
+    """
+    Return the angle of the complex argument.
+
+    For full documentation refer to :obj:`numpy.angle`.
+
+    Parameters
+    ----------
+    x : {dpnp.ndarray, usm_ndarray}
+        Input array, expected to have a complex-valued floating-point data type.
+    deg : bool, optional
+        Return angle in degrees if True, radians if False (default).
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        The counterclockwise angle from the positive real axis on
+        the complex plane in the range `(-pi, pi]`.
+        The returned array has a floating-point data type determined
+        by the Type Promotion Rules.
+
+    Notes
+    -----
+    Although the angle of the complex number 0 is undefined, `dpnp.angle(0)` returns the value 0.
+
+    See Also
+    --------
+    :obj:`dpnp.arctan2` : Element-wise arc tangent of `x1/x2` choosing the quadrant correctly.
+    :obj:`dpnp.arctan` : Trigonometric inverse tangent, element-wise.
+    :obj:`dpnp.absolute` : Calculate the absolute value element-wise.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> a = np.array([1.0, 1.0j, 1+1j])
+    >>> np.angle(a) # in radians
+    array([0.        , 1.57079633, 0.78539816]) # may vary
+
+    >>> np.angle(a, deg=True) # in degrees
+    array([ 0., 90., 45.])
+
+    """
+
+    dpnp.check_supported_arrays_type(z)
+    res = dpnp_angle(z)
+    if deg is True:
+        res = res * (180 / dpnp.pi)
+    return res
+
+
 def around(x, /, decimals=0, out=None):
     """
     Round an array to the given number of decimals.
@@ -390,12 +442,12 @@ def clip(a, a_min, a_max, *, out=None, order="K", **kwargs):
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Array containing elements to clip.
-    a_min, a_max : {dpnp_array, usm_ndarray, None}
+    a_min, a_max : {dpnp.ndarray, usm_ndarray, None}
         Minimum and maximum value. If ``None``, clipping is not performed on the corresponding edge.
         Only one of `a_min` and `a_max` may be ``None``. Both are broadcast against `a`.
-    out : {dpnp_array, usm_ndarray}, optional
+    out : {dpnp.ndarray, usm_ndarray}, optional
         The results will be placed in this array. It may be the input array for in-place clipping.
         `out` must be of the right shape to hold the output. Its type is preserved.
     order : {"C", "F", "A", "K", None}, optional
@@ -404,7 +456,7 @@ def clip(a, a_min, a_max, *, out=None, order="K", **kwargs):
 
     Returns
     -------
-    out : dpnp_array
+    out : dpnp.ndarray
         An array with the elements of `a`, but where values < `a_min` are replaced with `a_min`,
         and those > `a_max` with `a_max`.
 
@@ -439,6 +491,8 @@ def clip(a, a_min, a_max, *, out=None, order="K", **kwargs):
 
     if kwargs:
         raise NotImplementedError(f"kwargs={kwargs} is currently not supported")
+    elif a_min is None and a_max is None:
+        raise ValueError("One of max or min must be given")
 
     if order is None:
         order = "K"
@@ -574,7 +628,7 @@ def copysign(
     Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword argument `kwargs` is currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported real data types.
+    Input array data types are limited by supported real-valued data types.
 
     See Also
     --------
@@ -1478,21 +1532,23 @@ def gradient(x1, *varargs, **kwargs):
     return call_origin(numpy.gradient, x1, *varargs, **kwargs)
 
 
-def imag(x):
+def imag(val):
     """
     Return the imaginary part of the complex argument.
 
     For full documentation refer to :obj:`numpy.imag`.
 
+    Parameters
+    ----------
+    x : {dpnp.ndarray, usm_ndarray}
+        Input array.
+
     Returns
     -------
     out : dpnp.ndarray
-        The imaginary component of the complex argument.
-
-    Limitations
-    -----------
-    Parameter `x` is only supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+        The imaginary component of the complex argument. If `val` is real,
+        the type of `val` is used for the output. If `val` has complex
+        elements, the returned type is float.
 
     See Also
     --------
@@ -1516,12 +1572,8 @@ def imag(x):
 
     """
 
-    if dpnp.isscalar(x):
-        # input has to be an array
-        pass
-    else:
-        return dpnp_imag(x)
-    return call_origin(numpy.imag, x)
+    dpnp.check_supported_arrays_type(val)
+    return dpnp_imag(val)
 
 
 def maximum(
@@ -2210,21 +2262,23 @@ def proj(
     )
 
 
-def real(x):
+def real(val):
     """
     Return the real part of the complex argument.
 
     For full documentation refer to :obj:`numpy.real`.
 
+    Parameters
+    ----------
+    x : {dpnp.ndarray, usm_ndarray}
+        Input array.
+
     Returns
     -------
     out : dpnp.ndarray
-        The real component of the complex argument.
-
-    Limitations
-    -----------
-    Parameter `x` is only supported as either :class:`dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+        The real component of the complex argument.  If `val` is real,
+        the type of `val` is used for the output. If `val` has complex
+        elements, the returned type is float.
 
     See Also
     --------
@@ -2251,15 +2305,12 @@ def real(x):
     array(1.)
 
     """
-    if dpnp.isscalar(x):
-        # input has to be an array
-        pass
+
+    dpnp.check_supported_arrays_type(val)
+    if dpnp.issubsctype(val.dtype, dpnp.complexfloating):
+        return dpnp_real(val)
     else:
-        if dpnp.issubsctype(x.dtype, dpnp.complexfloating):
-            return dpnp_real(x)
-        else:
-            return x
-    return call_origin(numpy.real, x)
+        return val
 
 
 def remainder(
@@ -2643,7 +2694,7 @@ def subtract(
 
 
 def sum(
-    x,
+    a,
     /,
     *,
     axis=None,
@@ -2658,31 +2709,86 @@ def sum(
 
     For full documentation refer to :obj:`numpy.sum`.
 
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    axis : int or tuple of ints, optional
+        Axis or axes along which sums must be computed. If a tuple
+        of unique integers, sums are computed over multiple axes.
+        If ``None``, the sum is computed over the entire array.
+        Default: ``None``.
+    dtype : dtype, optional
+        Data type of the returned array. If ``None``, the default data
+        type is inferred from the "kind" of the input array data type.
+            * If `a` has a real-valued floating-point data type,
+                the returned array will have the default real-valued
+                floating-point data type for the device where input
+                array `a` is allocated.
+            * If `a` has signed integral data type, the returned array
+                will have the default signed integral type for the device
+                where input array `a` is allocated.
+            * If `a` has unsigned integral data type, the returned array
+                will have the default unsigned integral type for the device
+                where input array `a` is allocated.
+            * If `a` has a complex-valued floating-point data type,
+                the returned array will have the default complex-valued
+                floating-pointer data type for the device where input
+                array `a` is allocated.
+            * If `a` has a boolean data type, the returned array will
+                have the default signed integral type for the device
+                where input array `a` is allocated.
+        If the data type (either specified or resolved) differs from the
+        data type of `a`, the input array elements are cast to the
+        specified data type before computing the sum.
+        Default: ``None``.
+    out : {dpnp.ndarray, usm_ndarray}, optional
+        Alternative output array in which to place the result. It must
+        have the same shape as the expected output, but the type of
+        the output values will be cast if necessary.
+        Default: ``None``.
+    keepdims : bool, optional
+        If ``True``, the reduced axes (dimensions) are included in the result
+        as singleton dimensions, so that the returned array remains
+        compatible with the input array according to Array Broadcasting
+        rules. Otherwise, if ``False``, the reduced axes are not included in
+        the returned array. Default: ``False``.
+
     Returns
     -------
     out : dpnp.ndarray
-        an array containing the sums. If the sum was computed over the
+        An array containing the sums. If the sum is computed over the
         entire array, a zero-dimensional array is returned. The returned
         array has the data type as described in the `dtype` parameter
-        of the Python Array API standard for the `sum` function.
+        description above.
 
     Limitations
     -----------
-    Parameters `x` is supported as either :class:`dpnp.ndarray`
-    or :class:`dpctl.tensor.usm_ndarray`.
-    Parameters `initial` and `where` are supported with their default values.
+    Parameters `initial` and `where` are only supported with their default values.
     Otherwise ``NotImplementedError`` exception will be raised.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+
+    See Also
+    --------
+    :obj:`dpnp.ndarray.sum` : Equivalent method.
+    :obj:`dpnp.cumsum` : Cumulative sum of array elements.
+    :obj:`dpnp.trapz` : Integration of array values using the composite trapezoidal rule.
+    :obj:`dpnp.mean` : Compute the arithmetic mean.
+    :obj:`dpnp.average` : Compute the weighted average.
 
     Examples
     --------
     >>> import dpnp as np
-    >>> np.sum(np.array([1, 2, 3, 4, 5]))
-    array(15)
-    >>> np.sum(np.array(5))
-    array(5)
-    >>> result = np.sum(np.array([[0, 1], [0, 5]]), axis=0)
+    >>> np.sum(np.array([0.5, 1.5]))
+    array(2.)
+    >>> np.sum(np.array([0.5, 0.7, 0.2, 1.5]), dtype=np.int32)
+    array(1)
+    >>> a = np.array([[0, 1], [0, 5]])
+    >>> np.sum(a)
+    array(6)
+    >>> np.sum(a, axis=0)
     array([0, 6])
+    >>> np.sum(a, axis=1)
+    array([1, 5])
 
     """
 
@@ -2690,7 +2796,7 @@ def sum(
         if not isinstance(axis, (tuple, list)):
             axis = (axis,)
 
-        axis = normalize_axis_tuple(axis, x.ndim, "axis")
+        axis = normalize_axis_tuple(axis, a.ndim, "axis")
 
     if initial != 0:
         raise NotImplementedError(
@@ -2702,20 +2808,20 @@ def sum(
         )
     else:
         if (
-            len(x.shape) == 2
-            and x.itemsize == 4
+            len(a.shape) == 2
+            and a.itemsize == 4
             and (
                 (
                     axis == (0,)
-                    and x.flags.c_contiguous
-                    and 32 <= x.shape[1] <= 1024
-                    and x.shape[0] > x.shape[1]
+                    and a.flags.c_contiguous
+                    and 32 <= a.shape[1] <= 1024
+                    and a.shape[0] > a.shape[1]
                 )
                 or (
                     axis == (1,)
-                    and x.flags.f_contiguous
-                    and 32 <= x.shape[0] <= 1024
-                    and x.shape[1] > x.shape[0]
+                    and a.flags.f_contiguous
+                    and 32 <= a.shape[0] <= 1024
+                    and a.shape[1] > a.shape[0]
                 )
             )
         ):
@@ -2723,7 +2829,7 @@ def sum(
 
             from dpnp.backend.extensions.sycl_ext import _sycl_ext_impl
 
-            input = x
+            input = a
             if axis == (1,):
                 input = input.T
             input = dpnp.get_usm_ndarray(input)
@@ -2755,7 +2861,7 @@ def sum(
                 return result
 
         y = dpt.sum(
-            dpnp.get_usm_ndarray(x), axis=axis, dtype=dtype, keepdims=keepdims
+            dpnp.get_usm_ndarray(a), axis=axis, dtype=dtype, keepdims=keepdims
         )
         result = dpnp_array._create_from_usm_ndarray(y)
         return dpnp.get_result_array(result, out, casting="same_kind")
diff --git a/dpnp/dpnp_iface_nanfunctions.py b/dpnp/dpnp_iface_nanfunctions.py
index a16583fa0c9..fbaa06603d5 100644
--- a/dpnp/dpnp_iface_nanfunctions.py
+++ b/dpnp/dpnp_iface_nanfunctions.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2023-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -52,8 +52,10 @@
     "nancumprod",
     "nancumsum",
     "nanmax",
+    "nanmean",
     "nanmin",
     "nanprod",
+    "nanstd",
     "nansum",
     "nanvar",
 ]
@@ -70,14 +72,14 @@ def _replace_nan(a, val):
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     val : float
         NaN values are set to `val` before doing the operation.
 
     Returns
     -------
-    out : {dpnp_array}
+    out : {dpnp.ndarray}
         If `a` is of inexact type, return a copy of `a` with the NaNs
         replaced by the fill value, otherwise return `a`.
     mask: {bool, None}
@@ -108,13 +110,13 @@ def nanargmax(a, axis=None, out=None, *, keepdims=False):
 
     Parameters
     ----------
-    a :  {dpnp_array, usm_ndarray}
+    a :  {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int, optional
         Axis along which to search. If ``None``, the function must return
         the index of the maximum value of the flattened array.
         Default: ``None``.
-    out :  {dpnp_array, usm_ndarray}, optional
+    out :  {dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool
@@ -176,13 +178,13 @@ def nanargmin(a, axis=None, out=None, *, keepdims=False):
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int, optional
         Axis along which to search. If ``None``, the function must return
         the index of the minimum value of the flattened array.
         Default: ``None``.
-    out : {dpnp_array, usm_ndarray}, optional
+    out : {dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool
@@ -318,14 +320,14 @@ def nanmax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     Parameters
     ----------
-    a :  {dpnp_array, usm_ndarray}
+    a :  {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         Axis or axes along which maximum values must be computed. By default,
         the maximum value must be computed over the entire array. If a tuple of integers,
         maximum values must be computed over multiple axes.
         Default: ``None``.
-    out :  {dpnp_array, usm_ndarray}, optional
+    out :  {dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool
@@ -405,6 +407,122 @@ def nanmax(a, axis=None, out=None, keepdims=False, initial=None, where=True):
         return res
 
 
+def nanmean(a, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
+    """
+    Compute the arithmetic mean along the specified axis, ignoring NaNs.
+
+    For full documentation refer to :obj:`numpy.nanmean`.
+
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    axis : int or tuple of ints, optional
+        Axis or axes along which the arithmetic means must be computed. If
+        a tuple of unique integers, the means are computed over multiple
+        axes. If ``None``, the mean is computed over the entire array.
+        Default: ``None``.
+    dtype : dtype, optional
+        Type to use in computing the mean. By default, if `a` has a
+        floating-point data type, the returned array will have
+        the same data type as `a`.
+        If `a` has a boolean or integral data type, the returned array
+        will have the default floating point data type for the device
+        where input array `a` is allocated.
+    out : {dpnp.ndarray, usm_ndarray}, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output but the type (of the calculated
+        values) will be cast if necessary. Default: ``None``.
+    keepdims : bool, optional
+        If ``True``, the reduced axes (dimensions) are included in the result
+        as singleton dimensions, so that the returned array remains
+        compatible with the input array according to Array Broadcasting
+        rules. Otherwise, if ``False``, the reduced axes are not included in
+        the returned array. Default: ``False``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        An array containing the arithmetic means along the specified axis(axes).
+        If the input is a zero-size array, an array containing NaN values is returned.
+        In addition, NaN is returned for slices that contain only NaNs.
+
+    Limitations
+    -----------
+    Parameter `where` is only supported with its default value.
+    Otherwise ``NotImplementedError`` exception will be raised.
+
+    See Also
+    --------
+    :obj:`dpnp.average` : Weighted average.
+    :obj:`dpnp.mean` : Compute the arithmetic mean along the specified axis.
+    :obj:`dpnp.var` : Compute the variance along the specified axis.
+    :obj:`dpnp.nanvar` : Compute the variance along the specified axis,
+                while ignoring NaNs.
+    :obj:`dpnp.std` : Compute the standard deviation along the specified axis.
+    :obj:`dpnp.nanstd` : Compute the standard deviation along the specified axis,
+                while ignoring NaNs.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> a = np.array([[1, np.nan], [3, 4]])
+    >>> np.nanmean(a)
+    array(2.6666666666666665)
+    >>> np.nanmean(a, axis=0)
+    array([2., 4.])
+    >>> np.nanmean(a, axis=1)
+    array([1., 3.5]) # may vary
+
+    """
+
+    if where is not True:
+        raise NotImplementedError(
+            "where keyword argument is only supported with its default value."
+        )
+    else:
+        arr, mask = _replace_nan(a, 0)
+        if mask is None:
+            return dpnp.mean(
+                arr,
+                axis=axis,
+                dtype=dtype,
+                out=out,
+                keepdims=keepdims,
+                where=where,
+            )
+
+        if dtype is not None:
+            dtype = dpnp.dtype(dtype)
+            if not dpnp.issubdtype(dtype, dpnp.inexact):
+                raise TypeError(
+                    "If input is inexact, then dtype must be inexact."
+                )
+        if out is not None:
+            dpnp.check_supported_arrays_type(out)
+            if not dpnp.issubdtype(out.dtype, dpnp.inexact):
+                raise TypeError(
+                    "If input is inexact, then out must be inexact."
+                )
+
+        cnt_dtype = a.real.dtype if dtype is None else dtype
+        cnt = dpnp.sum(
+            ~mask, axis=axis, dtype=cnt_dtype, keepdims=keepdims, where=where
+        )
+        var_dtype = a.dtype if dtype is None else dtype
+        avg = dpnp.sum(
+            arr,
+            axis=axis,
+            dtype=var_dtype,
+            out=out,
+            keepdims=keepdims,
+            where=where,
+        )
+        dpnp.divide(avg, cnt, out=avg)
+
+        return avg
+
+
 def nanmin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
     """
     Return the minimum of an array or minimum along an axis, ignoring any NaNs.
@@ -413,14 +531,14 @@ def nanmin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     Parameters
     ----------
-    a :  {dpnp_array, usm_ndarray}
+    a :  {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         Axis or axes along which minimum values must be computed. By default,
         the minimum value must be computed over the entire array. If a tuple of integers,
         minimum values must be computed over multiple axes.
         Default: ``None``.
-    out :  {dpnp_array, usm_ndarray}, optional
+    out :  {dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool, optional
@@ -548,8 +666,7 @@ def nanprod(
 
     """
 
-    a, mask = _replace_nan(a, 1)
-
+    a, _ = _replace_nan(a, 1)
     return dpnp.prod(
         a,
         axis=axis,
@@ -561,36 +678,224 @@ def nanprod(
     )
 
 
-def nansum(x1, **kwargs):
+def nansum(
+    a,
+    /,
+    *,
+    axis=None,
+    dtype=None,
+    keepdims=False,
+    out=None,
+    initial=0,
+    where=True,
+):
     """
-    Calculate sum() function treating 'Not a Numbers' (NaN) as zero.
+    Return the sum of array elements over a given axis treating Not a Numbers (NaNs) as zero.
 
     For full documentation refer to :obj:`numpy.nansum`.
 
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    axis : int or tuple of ints, optional
+        Axis or axes along which sums must be computed. If a tuple
+        of unique integers, sums are computed over multiple axes.
+        If ``None``, the sum is computed over the entire array.
+        Default: ``None``.
+    dtype : dtype, optional
+        Data type of the returned array. If ``None``, the default data
+        type is inferred from the "kind" of the input array data type.
+            * If `a` has a real-valued floating-point data type,
+                the returned array will have the default real-valued
+                floating-point data type for the device where input
+                array `a` is allocated.
+            * If `a` has signed integral data type, the returned array
+                will have the default signed integral type for the device
+                where input array `a` is allocated.
+            * If `a` has unsigned integral data type, the returned array
+                will have the default unsigned integral type for the device
+                where input array `a` is allocated.
+            * If `a` has a complex-valued floating-point data type,
+                the returned array will have the default complex-valued
+                floating-pointer data type for the device where input
+                array `a` is allocated.
+            * If `a` has a boolean data type, the returned array will
+                have the default signed integral type for the device
+                where input array `a` is allocated.
+        If the data type (either specified or resolved) differs from the
+        data type of `a`, the input array elements are cast to the
+        specified data type before computing the sum.
+        Default: ``None``.
+    out : {dpnp.ndarray, usm_ndarray}, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output but the type (of the calculated
+        values) will be cast if necessary. Default: ``None``.
+    keepdims : bool, optional
+        If ``True``, the reduced axes (dimensions) are included in the result
+        as singleton dimensions, so that the returned array remains
+        compatible with the input array according to Array Broadcasting
+        rules. Otherwise, if ``False``, the reduced axes are not included in
+        the returned array. Default: ``False``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        An array containing the sums. If the sum is computed over the
+        entire array, a zero-dimensional array is returned. The returned
+        array has the data type as described in the `dtype` parameter
+        description above. Zero is returned for slices that are all-NaN
+        or empty.
+
     Limitations
     -----------
-    Parameter `x1` is supported as :class:`dpnp.ndarray`.
-    Keyword argument `kwargs` is currently unsupported.
-    Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Parameters `initial` and `where` are supported with their default values.
+    Otherwise ``NotImplementedError`` exception will be raised.
+
+    See Also
+    --------
+    :obj:`dpnp.sum` : Sum across array propagating NaNs.
+    :obj:`dpnp.isnan` : Show which elements are NaN.
+    :obj:`dpnp.isfinite` : Show which elements are not NaN or +/-inf.
+
+    Notes
+    -----
+    If both positive and negative infinity are present, the sum will be Not
+    A Number (NaN).
 
     Examples
     --------
     >>> import dpnp as np
-    >>> np.nansum(np.array([1, 2]))
-    3
-    >>> np.nansum(np.array([[1, 2], [3, 4]]))
-    10
+    >>> np.nansum(np.array([1]))
+    array(1)
+    >>> np.nansum(np.array([1, np.nan]))
+    array(1.)
+    >>> a = np.array([[1, 1], [1, np.nan]])
+    >>> np.nansum(a)
+    array(3.)
+    >>> np.nansum(a, axis=0)
+    array([2.,  1.])
+    >>> np.nansum(np.array([1, np.nan, np.inf]))
+    array(inf)
+    >>> np.nansum(np.array([1, np.nan, np.NINF]))
+    array(-inf)
+    >>> np.nansum(np.array([1, np.nan, np.inf, -np.inf])) # both +/- infinity present
+    array(nan)
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc and not kwargs:
-        result_obj = dpnp_nansum(x1_desc).get_pyobj()
-        result = dpnp.convert_single_elem_array_to_scalar(result_obj)
-        return result
+    a, _ = _replace_nan(a, 0)
+    return dpnp.sum(
+        a,
+        axis=axis,
+        dtype=dtype,
+        out=out,
+        keepdims=keepdims,
+        initial=initial,
+        where=where,
+    )
 
-    return call_origin(numpy.nansum, x1, **kwargs)
+
+def nanstd(
+    a, axis=None, dtype=None, out=None, ddof=0, keepdims=False, *, where=True
+):
+    """
+    Compute the standard deviation along the specified axis, while ignoring NaNs.
+
+    For full documentation refer to :obj:`numpy.nanstd`.
+
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    axis : int or tuple of ints, optional
+        Axis or axes along which the standard deviations must be computed.
+        If a tuple of unique integers is given, the standard deviations
+        are computed over multiple axes. If ``None``, the standard deviation
+        is computed over the entire array.
+        Default: ``None``.
+    dtype : dtype, optional
+        Type to use in computing the standard deviation. By default,
+        if `a` has a floating-point data type, the returned array
+        will have the same data type as `a`.
+        If `a` has a boolean or integral data type, the returned array
+        will have the default floating point data type for the device
+        where input array `a` is allocated.
+    out : {dpnp.ndarray, usm_ndarray}, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output but the type (of the calculated
+        values) will be cast if necessary.
+    ddof : {int, float}, optional
+        Means Delta Degrees of Freedom. The divisor used in calculations
+        is ``N - ddof``, where ``N`` the number of non-NaN elements.
+        Default: `0.0`.
+    keepdims : bool, optional
+        If ``True``, the reduced axes (dimensions) are included in the result
+        as singleton dimensions, so that the returned array remains
+        compatible with the input array according to Array Broadcasting
+        rules. Otherwise, if ``False``, the reduced axes are not included in
+        the returned array. Default: ``False``.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        An array containing the standard deviations. If the standard
+        deviation was computed over the entire array, a zero-dimensional
+        array is returned. If ddof is >= the number of non-NaN elements
+        in a slice or the slice contains only NaNs, then the result for
+        that slice is NaN.
+
+    Limitations
+    -----------
+    Parameters `where` is only supported with its default value.
+    Otherwise ``NotImplementedError`` exception will be raised.
+
+    Notes
+    -----
+    Note that, for complex numbers, the absolute value is taken before squaring,
+    so that the result is always real and nonnegative.
+
+    See Also
+    --------
+    :obj:`dpnp.var` : Compute the variance along the specified axis.
+    :obj:`dpnp.mean` : Compute the arithmetic mean along the specified axis.
+    :obj:`dpnp.std` : Compute the standard deviation along the specified axis.
+    :obj:`dpnp.nanmean` : Compute the arithmetic mean along the specified axis, ignoring NaNs.
+    :obj:`dpnp.nanvar` : Compute the variance along the specified axis, while ignoring NaNs.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> a = np.array([[1, np.nan], [3, 4]])
+    >>> np.nanstd(a)
+    array(1.247219128924647)
+    >>> np.nanstd(a, axis=0)
+    array([1.,  0.])
+    >>> np.nanstd(a, axis=1)
+    array([0.,  0.5])  # may vary
+
+    """
+
+    if where is not True:
+        raise NotImplementedError(
+            "where keyword argument is only supported with its default value."
+        )
+    elif not isinstance(ddof, (int, float)):
+        raise TypeError(
+            "An integer or float is required, but got {}".format(type(ddof))
+        )
+    else:
+        res = nanvar(
+            a,
+            axis=axis,
+            dtype=dtype,
+            out=out,
+            ddof=ddof,
+            keepdims=keepdims,
+            where=where,
+        )
+        dpnp.sqrt(res, out=res)
+        return res
 
 
 def nanvar(
@@ -603,25 +908,27 @@ def nanvar(
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}:
+    a : {dpnp_array, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         axis or axes along which the variances must be computed. If a tuple
         of unique integers is given, the variances are computed over multiple axes.
         If ``None``, the variance is computed over the entire array.
-        Default: `None`.
+        Default: ``None``.
     dtype : dtype, optional
-        Type to use in computing the standard deviation. For arrays of
-        integer type the default real-valued floating-point data type is used,
-        for arrays of float types it is the same as the array type.
+        Type to use in computing the variance. By default, if `a` has a
+        floating-point data type, the returned array will have
+        the same data type as `a`.
+        If `a` has a boolean or integral data type, the returned array
+        will have the default floating point data type for the device
+        where input array `a` is allocated.
     out : {dpnp_array, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output but the type (of the calculated
         values) will be cast if necessary.
     ddof : {int, float}, optional
         Means Delta Degrees of Freedom.  The divisor used in calculations
-        is ``N - ddof``, where ``N`` corresponds to the total
-        number of elements over which the variance is calculated.
+        is ``N - ddof``, where ``N`` represents the number of non-NaN elements.
         Default: `0.0`.
     keepdims : bool, optional
         If ``True``, the reduced axes (dimensions) are included in the result
@@ -633,20 +940,20 @@ def nanvar(
     Returns
     -------
     out : dpnp.ndarray
-        an array containing the variances. If the variance was computed
+        An array containing the variances. If the variance was computed
         over the entire array, a zero-dimensional array is returned.
-
-        If `a` has a real-valued floating-point data type, the returned
-        array will have the same data type as `a`.
-        If `a` has a boolean or integral data type, the returned array
-        will have the default floating point data type for the device
-        where input array `a` is allocated.
+        If ddof is >= the number of non-NaN elements in a slice or the
+        slice contains only NaNs, then the result for that slice is NaN.
 
     Limitations
     -----------
     Parameters `where` is only supported with its default value.
     Otherwise ``NotImplementedError`` exception will be raised.
-    Input array data types are limited by real valued data types.
+
+    Notes
+    -----
+    Note that, for complex numbers, the absolute value is taken before squaring,
+    so that the result is always real and nonnegative.
 
     See Also
     --------
@@ -670,7 +977,6 @@ def nanvar(
 
     """
 
-    dpnp.check_supported_arrays_type(a)
     if where is not True:
         raise NotImplementedError(
             "where keyword argument is only supported with its default value."
@@ -694,7 +1000,7 @@ def nanvar(
 
         if dtype is not None:
             dtype = dpnp.dtype(dtype)
-            if not issubclass(dtype.type, dpnp.inexact):
+            if not dpnp.issubdtype(dtype, dpnp.inexact):
                 raise TypeError(
                     "If input is inexact, then dtype must be inexact."
                 )
@@ -710,9 +1016,7 @@ def nanvar(
         cnt = dpnp.sum(
             ~mask, axis=axis, dtype=var_dtype, keepdims=True, where=where
         )
-        avg = dpnp.sum(
-            arr, axis=axis, dtype=var_dtype, keepdims=True, where=where
-        )
+        avg = dpnp.sum(arr, axis=axis, dtype=dtype, keepdims=True, where=where)
         avg = dpnp.divide(avg, cnt, out=avg)
 
         # Compute squared deviation from mean.
diff --git a/dpnp/dpnp_iface_searching.py b/dpnp/dpnp_iface_searching.py
index 5187a280d5c..4df311ad45e 100644
--- a/dpnp/dpnp_iface_searching.py
+++ b/dpnp/dpnp_iface_searching.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -57,13 +57,13 @@ def argmax(a, axis=None, out=None, *, keepdims=False):
 
     Parameters
     ----------
-    a :  {dpnp_array, usm_ndarray}
+    a :  {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int, optional
         Axis along which to search. If ``None``, the function must return
         the index of the maximum value of the flattened array.
         Default: ``None``.
-    out :  {dpnp_array, usm_ndarray}, optional
+    out :  {dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool
@@ -141,13 +141,13 @@ def argmin(a, axis=None, out=None, *, keepdims=False):
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}
+    a : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int, optional
         Axis along which to search. If ``None``, the function must return
         the index of the minimum value of the flattened array.
         Default: ``None``.
-    out : {dpnp_array, usm_ndarray}, optional
+    out : {dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool, optional
diff --git a/dpnp/dpnp_iface_sorting.py b/dpnp/dpnp_iface_sorting.py
index 6e7c116a913..6a3db20e74c 100644
--- a/dpnp/dpnp_iface_sorting.py
+++ b/dpnp/dpnp_iface_sorting.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -40,33 +40,60 @@
 """
 
 
+import dpctl.tensor as dpt
 import numpy
+from numpy.core.numeric import normalize_axis_index
 
 import dpnp
 from dpnp.dpnp_algo import *
+from dpnp.dpnp_array import dpnp_array
 from dpnp.dpnp_utils import *
 
 __all__ = ["argsort", "partition", "searchsorted", "sort"]
 
 
-def argsort(in_array1, axis=-1, kind=None, order=None):
+def argsort(a, axis=-1, kind=None, order=None):
     """
     Returns the indices that would sort an array.
 
     For full documentation refer to :obj:`numpy.argsort`.
 
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Array to be sorted.
+    axis : int or None, optional
+        Axis along which to sort. If ``None``, the array is flattened before
+        sorting. The default is -1, which sorts along the last axis.
+    kind : {None, "stable"}, optional
+        Default is ``None``, which is equivalent to `"stable"`.
+        Unlike in NumPy any other options are not accepted here.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Array of indices that sort `a` along the specified `axis`.
+        If `a` is one-dimensional, ``a[index_array]`` yields a sorted `a`.
+        More generally, ``dpnp.take_along_axis(a, index_array, axis=axis)``
+        always yields the sorted `a`, irrespective of dimensionality.
+        The return array has default array index data type.
+
+    Notes
+    -----
+    For zero-dimensional arrays, if `axis=None`, output is a one-dimensional
+    array with a single zero element. Otherwise, an ``AxisError`` is raised.
+
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
-    Otherwise the function will be executed sequentially on CPU.
-    Parameter `axis` is supported only with default value ``-1``.
-    Parameter `kind` is supported only with default value ``None``.
-    Parameter `order` is supported only with default value ``None``.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Parameters `order` is only supported with its default value.
+    Parameters `kind` can only be ``None`` or ``"stable"`` which
+    are equivalent.
+    Otherwise ``NotImplementedError`` exception will be raised.
 
     See Also
     --------
-    :obj:`dpnp.sort` : Describes sorting algorithms used.
+    :obj:`dpnp.ndarray.argsort` : Equivalent method.
+    :obj:`dpnp.sort` : Return a sorted copy of an array.
     :obj:`dpnp.lexsort` : Indirect stable sort with multiple keys.
     :obj:`dpnp.argpartition` : Indirect partial sort.
     :obj:`dpnp.take_along_axis` : Apply ``index_array`` from argsort to
@@ -76,26 +103,50 @@ def argsort(in_array1, axis=-1, kind=None, order=None):
     --------
     >>> import dpnp as np
     >>> x = np.array([3, 1, 2])
-    >>> out = np.argsort(x)
-    >>> [i for i in out]
-    [1, 2, 0]
+    >>> np.argsort(x)
+    array([1, 2, 0])
+
+    >>> x = np.array([[0, 3], [2, 2]])
+    >>> x
+    array([[0, 3],
+           [2, 2]])
+
+    >>> ind = np.argsort(x, axis=0)  # sorts along first axis
+    >>> ind
+    array([[0, 1],
+           [1, 0]])
+    >>> np.take_along_axis(x, ind, axis=0)  # same as np.sort(x, axis=0)
+    array([[0, 2],
+           [2, 3]])
+
+    >>> ind = np.argsort(x, axis=1)  # sorts along last axis
+    >>> ind
+    array([[0, 1],
+           [0, 1]])
+    >>> np.take_along_axis(x, ind, axis=1)  # same as np.sort(x, axis=1)
+    array([[0, 3],
+           [2, 2]])
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(
-        in_array1, copy_when_nondefault_queue=False
-    )
-    if x1_desc:
-        if axis != -1:
-            pass
-        elif kind is not None:
-            pass
-        elif order is not None:
-            pass
-        else:
-            return dpnp_argsort(x1_desc).get_pyobj()
-
-    return call_origin(numpy.argsort, in_array1, axis, kind, order)
+    if order is not None:
+        raise NotImplementedError(
+            "order keyword argument is only supported with its default value."
+        )
+    elif kind is not None and kind != "stable":
+        raise NotImplementedError(
+            "kind keyword argument can only be None or 'stable'."
+        )
+    else:
+        dpnp.check_supported_arrays_type(a)
+        if axis is None:
+            a = a.flatten()
+            axis = -1
+
+        axis = normalize_axis_index(axis, ndim=a.ndim)
+        return dpnp_array._create_from_usm_ndarray(
+            dpt.argsort(dpnp.get_usm_ndarray(a), axis=axis)
+        )
 
 
 def partition(x1, kth, axis=-1, kind="introselect", order=None):
@@ -166,23 +217,44 @@ def searchsorted(x1, x2, side="left", sorter=None):
     return call_origin(numpy.searchsorted, x1, x2, side=side, sorter=sorter)
 
 
-def sort(x1, **kwargs):
+def sort(a, axis=-1, kind=None, order=None):
     """
     Return a sorted copy of an array.
 
     For full documentation refer to :obj:`numpy.sort`.
 
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Array to be sorted.
+    axis : int or None, optional
+        Axis along which to sort. If ``None``, the array is flattened before
+        sorting. The default is -1, which sorts along the last axis.
+    kind : {None, "stable"}, optional
+        Default is ``None``, which is equivalent to `"stable"`.
+        Unlike in NumPy any other options are not accepted here.
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        Sorted array with the same type and shape as `a`.
+
+    Notes
+    -----
+    For zero-dimensional arrays, if `axis=None`, output is the input array
+    returned as a one-dimensional array. Otherwise, an ``AxisError`` is raised.
+
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
-    Keyword arguments ``kwargs`` are currently unsupported.
-    Dimension of input array is supported to be equal to ``1``.
-    Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Parameters `order` is only supported with its default value.
+    Parameters `kind` can only be ``None`` or ``"stable"`` which
+    are equivalent.
+    Otherwise ``NotImplementedError`` exception will be raised.
 
     See Also
     --------
-    :obj:`dpnp.argsort` : Indirect sort.
+    :obj:`dpnp.ndarray.sort` : Sort an array in-place.
+    :obj:`dpnp.argsort` : Return the indices that would sort an array.
     :obj:`dpnp.lexsort` : Indirect stable sort on multiple keys.
     :obj:`dpnp.searchsorted` : Find elements in a sorted array.
     :obj:`dpnp.partition` : Partial sort.
@@ -190,18 +262,33 @@ def sort(x1, **kwargs):
     Examples
     --------
     >>> import dpnp as np
-    >>> a = np.array([1, 4, 3, 1])
-    >>> out = np.sort(a)
-    >>> [i for i in out]
-    [1, 1, 3, 4]
+    >>> a = np.array([[1,4],[3,1]])
+    >>> np.sort(a)                # sort along the last axis
+    array([[1, 4],
+           [1, 3]])
+    >>> np.sort(a, axis=None)     # sort the flattened array
+    array([1, 1, 3, 4])
+    >>> np.sort(a, axis=0)        # sort along the first axis
+    array([[1, 1],
+           [3, 4]])
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc and not kwargs:
-        if x1_desc.ndim != 1:
-            pass
-        else:
-            return dpnp_sort(x1_desc).get_pyobj()
-
-    return call_origin(numpy.sort, x1, **kwargs)
+    if order is not None:
+        raise NotImplementedError(
+            "order keyword argument is only supported with its default value."
+        )
+    elif kind is not None and kind != "stable":
+        raise NotImplementedError(
+            "kind keyword argument can only be None or 'stable'."
+        )
+    else:
+        dpnp.check_supported_arrays_type(a)
+        if axis is None:
+            a = a.flatten()
+            axis = -1
+
+        axis = normalize_axis_index(axis, ndim=a.ndim)
+        return dpnp_array._create_from_usm_ndarray(
+            dpt.sort(dpnp.get_usm_ndarray(a), axis=axis)
+        )
diff --git a/dpnp/dpnp_iface_statistics.py b/dpnp/dpnp_iface_statistics.py
index 4e4201c97cd..903de2d69e9 100644
--- a/dpnp/dpnp_iface_statistics.py
+++ b/dpnp/dpnp_iface_statistics.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -72,7 +72,7 @@ def _count_reduce_items(arr, axis, where=True):
 
     Parameters
     ----------
-    arr : {dpnp_array, usm_ndarray}
+    arr : {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         axis or axes along which the number of items used in a reduction operation must be counted.
@@ -143,51 +143,160 @@ def amin(a, axis=None, out=None, keepdims=False, initial=None, where=True):
     )
 
 
-def average(x1, axis=None, weights=None, returned=False):
+def average(a, axis=None, weights=None, returned=False, *, keepdims=False):
     """
     Compute the weighted average along the specified axis.
 
     For full documentation refer to :obj:`numpy.average`.
 
-    Limitations
-    -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
-    Parameter `axis` is supported only with default value ``None``.
-    Parameter `weights` is supported only with default value ``None``.
-    Parameter `returned` is supported only with default value ``False``.
-    Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    axis : int or tuple of ints, optional
+        Axis or axes along which the averages must be computed. If
+        a tuple of unique integers, the averages are computed over multiple
+        axes. If ``None``, the average is computed over the entire array.
+        Default: ``None``.
+    weights : array_like, optional
+        An array of weights associated with the values in `a`. Each value in
+        `a` contributes to the average according to its associated weight.
+        The weights array can either be 1-D (in which case its length must be
+        the size of `a` along the given axis) or of the same shape as `a`.
+        If `weights=None`, then all data in `a` are assumed to have a
+        weight equal to one.  The 1-D calculation is::
+
+            avg = sum(a * weights) / sum(weights)
+
+        The only constraint on `weights` is that `sum(weights)` must not be 0.
+    returned : bool, optional
+        Default is ``False``. If ``True``, the tuple (`average`, `sum_of_weights`)
+        is returned, otherwise only the average is returned.
+        If `weights=None`, `sum_of_weights` is equivalent to the number of
+        elements over which the average is taken.
+    keepdims : bool, optional
+        If ``True``, the reduced axes (dimensions) are included in the result
+        as singleton dimensions, so that the returned array remains
+        compatible with the input array according to Array Broadcasting
+        rules. Otherwise, if ``False``, the reduced axes are not included in
+        the returned array. Default: ``False``.
+
+    Returns
+    -------
+    out, [sum_of_weights] : dpnp.ndarray, dpnp.ndarray
+        Return the average along the specified axis. When `returned` is ``True``,
+        return a tuple with the average as the first element and the sum of the
+        weights as the second element. `sum_of_weights` is of the same type as
+        `out`. The result dtype follows a general pattern. If `weights` is
+        ``None``, the result dtype will be that of `a` , or default floating point
+        data type for the device where input array `a` is allocated. Otherwise,
+        if `weights` is not ``None`` and `a` is non-integral, the result type
+        will be the type of lowest precision capable of representing values of
+        both `a` and `weights`. If `a` happens to be integral, the previous rules
+        still applies but the result dtype will at least be default floating point
+        data type for the device where input array `a` is allocated.
 
     See Also
     --------
     :obj:`dpnp.mean` : Compute the arithmetic mean along the specified axis.
+    :obj:`dpnp.sum` : Sum of array elements over a given axis.
 
     Examples
     --------
     >>> import dpnp as np
     >>> data = np.arange(1, 5)
-    >>> [i for i in data]
-    [1, 2, 3, 4]
+    >>> data
+    array([1, 2, 3, 4])
     >>> np.average(data)
-    2.5
+    array(2.5)
+    >>> np.average(np.arange(1, 11), weights=np.arange(10, 0, -1))
+    array(4.0)
+
+    >>> data = np.arange(6).reshape((3, 2))
+    >>> data
+    array([[0, 1],
+        [2, 3],
+        [4, 5]])
+    >>> np.average(data, axis=1, weights=[1./4, 3./4])
+    array([0.75, 2.75, 4.75])
+    >>> np.average(data, weights=[1./4, 3./4])
+    TypeError: Axis must be specified when shapes of a and weights differ.
+
+    With ``keepdims=True``, the following result has shape (3, 1).
+
+    >>> np.average(data, axis=1, keepdims=True)
+    array([[0.5],
+        [2.5],
+        [4.5]])
+
+    >>> a = np.ones(5, dtype=np.float64)
+    >>> w = np.ones(5, dtype=np.complex64)
+    >>> avg = np.average(a, weights=w)
+    >>> print(avg.dtype)
+    complex128
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(x1, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if axis is not None:
-            pass
-        elif weights is not None:
-            pass
-        elif returned:
-            pass
+    dpnp.check_supported_arrays_type(a)
+    if weights is None:
+        avg = dpnp.mean(a, axis=axis, keepdims=keepdims)
+        scl = dpnp.asanyarray(
+            avg.dtype.type(a.size / avg.size),
+            usm_type=a.usm_type,
+            sycl_queue=a.sycl_queue,
+        )
+    else:
+        if not isinstance(weights, (dpnp_array, dpt.usm_ndarray)):
+            wgt = dpnp.asanyarray(
+                weights, usm_type=a.usm_type, sycl_queue=a.sycl_queue
+            )
         else:
-            result_obj = dpnp_average(x1_desc)
-            result = dpnp.convert_single_elem_array_to_scalar(result_obj)
+            get_usm_allocations([a, weights])
+            wgt = weights
 
-            return result
+        if not dpnp.issubdtype(a.dtype, dpnp.inexact):
+            default_dtype = dpnp.default_float_type(a.device)
+            result_dtype = dpnp.result_type(a.dtype, wgt.dtype, default_dtype)
+        else:
+            result_dtype = dpnp.result_type(a.dtype, wgt.dtype)
 
-    return call_origin(numpy.average, x1, axis, weights, returned)
+        # Sanity checks
+        if a.shape != wgt.shape:
+            if axis is None:
+                raise TypeError(
+                    "Axis must be specified when shapes of input array and weights differ."
+                )
+            if wgt.ndim != 1:
+                raise TypeError(
+                    "1D weights expected when shapes of input array and weights differ."
+                )
+            if wgt.shape[0] != a.shape[axis]:
+                raise ValueError(
+                    "Length of weights not compatible with specified axis."
+                )
+
+            # setup wgt to broadcast along axis
+            wgt = dpnp.broadcast_to(wgt, (a.ndim - 1) * (1,) + wgt.shape)
+            wgt = wgt.swapaxes(-1, axis)
+
+        scl = wgt.sum(axis=axis, dtype=result_dtype, keepdims=keepdims)
+        if dpnp.any(scl == 0.0):
+            raise ZeroDivisionError("Weights sum to zero, can't be normalized")
+
+        # result_datatype
+        avg = (
+            dpnp.multiply(a, wgt).sum(
+                axis=axis, dtype=result_dtype, keepdims=keepdims
+            )
+            / scl
+        )
+
+    if returned:
+        if scl.shape != avg.shape:
+            scl = dpnp.broadcast_to(scl, avg.shape).copy()
+        return avg, scl
+    else:
+        return avg
 
 
 def bincount(x1, weights=None, minlength=0):
@@ -376,14 +485,14 @@ def max(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     Parameters
     ----------
-    a :  {dpnp_array, usm_ndarray}
+    a :  {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         Axis or axes along which maximum values must be computed. By default,
         the maximum value must be computed over the entire array. If a tuple of integers,
         maximum values must be computed over multiple axes.
         Default: ``None``.
-    out :  {dpnp_array, usm_ndarray}, optional
+    out :  {dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool
@@ -462,19 +571,43 @@ def mean(a, /, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
 
     For full documentation refer to :obj:`numpy.mean`.
 
+    Parameters
+    ----------
+    a : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    axis : int or tuple of ints, optional
+        Axis or axes along which the arithmetic means must be computed. If
+        a tuple of unique integers, the means are computed over multiple
+        axes. If ``None``, the mean is computed over the entire array.
+        Default: ``None``.
+    dtype : dtype, optional
+        Type to use in computing the mean. By default, if `a` has a
+        floating-point data type, the returned array will have
+        the same data type as `a`.
+        If `a` has a boolean or integral data type, the returned array
+        will have the default floating point data type for the device
+        where input array `a` is allocated.
+    out : {dpnp.ndarray, usm_ndarray}, optional
+        Alternative output array in which to place the result. It must have
+        the same shape as the expected output but the type (of the calculated
+        values) will be cast if necessary. Default: ``None``.
+    keepdims : bool, optional
+        If ``True``, the reduced axes (dimensions) are included in the result
+        as singleton dimensions, so that the returned array remains
+        compatible with the input array according to Array Broadcasting
+        rules. Otherwise, if ``False``, the reduced axes are not included in
+        the returned array. Default: ``False``.
+
     Returns
     -------
     out : dpnp.ndarray
-        an array containing the mean values of the elements along the specified axis(axes).
+        An array containing the arithmetic means along the specified axis(axes).
         If the input is a zero-size array, an array containing NaN values is returned.
 
     Limitations
     -----------
-    Parameters `a` is supported as either :class:`dpnp.ndarray`
-    or :class:`dpctl.tensor.usm_ndarray`.
     Parameter `where` is only supported with its default value.
     Otherwise ``NotImplementedError`` exception will be raised.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
 
     See Also
     --------
@@ -512,7 +645,7 @@ def mean(a, /, axis=None, dtype=None, out=None, keepdims=False, *, where=True):
         )
         result = result.astype(dtype) if dtype is not None else result
 
-        return dpnp.get_result_array(result, out)
+        return dpnp.get_result_array(result, out, casting="same_kind")
 
 
 def median(x1, axis=None, out=None, overwrite_input=False, keepdims=False):
@@ -573,14 +706,14 @@ def min(a, axis=None, out=None, keepdims=False, initial=None, where=True):
 
     Parameters
     ----------
-    a :  {dpnp_array, usm_ndarray}
+    a :  {dpnp.ndarray, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         Axis or axes along which minimum values must be computed. By default,
         the minimum value must be computed over the entire array. If a tuple of integers,
         minimum values must be computed over multiple axes.
         Default: ``None``.
-    out :  {dpnp_array, usm_ndarray}, optional
+    out :  {dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     keepdims : bool, optional
@@ -672,7 +805,7 @@ def ptp(
 
     Limitations
     -----------
-    Input array is supported as :class:`dpnp.dpnp_array` or :class:`dpctl.tensor.usm_ndarray`.
+    Input array is supported as :class:`dpnp.dpnp.ndarray` or :class:`dpctl.tensor.usm_ndarray`.
 
     Examples
     --------
@@ -706,17 +839,21 @@ def std(
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}:
-        nput array.
+    a : {dpnp_array, usm_ndarray}
+        Input array.
     axis : int or tuple of ints, optional
-        Axis or axes along which the variances must be computed. If a tuple
-        of unique integers is given, the variances are computed over multiple axes.
-        If ``None``, the variance is computed over the entire array.
-        Default: `None`.
+        Axis or axes along which the standard deviations must be computed.
+        If a tuple of unique integers is given, the standard deviations
+        are computed over multiple axes. If ``None``, the standard deviation
+        is computed over the entire array.
+        Default: ``None``.
     dtype : dtype, optional
-        Type to use in computing the standard deviation. For arrays of
-        integer type the default real-valued floating-point data type is used,
-        for arrays of float types it is the same as the array type.
+        Type to use in computing the standard deviation. By default,
+        if `a` has a floating-point data type, the returned array
+        will have the same data type as `a`.
+        If `a` has a boolean or integral data type, the returned array
+        will have the default floating point data type for the device
+        where input array `a` is allocated.
     out : {dpnp_array, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output but the type (of the calculated
@@ -724,7 +861,7 @@ def std(
     ddof : {int, float}, optional
         Means Delta Degrees of Freedom.  The divisor used in calculations
         is ``N - ddof``, where ``N`` corresponds to the total
-        number of elements over which the variance is calculated.
+        number of elements over which the standard deviation is calculated.
         Default: `0.0`.
     keepdims : bool, optional
         If ``True``, the reduced axes (dimensions) are included in the result
@@ -736,21 +873,14 @@ def std(
     Returns
     -------
     out : dpnp.ndarray
-        an array containing the standard deviations. If the standard
+        An array containing the standard deviations. If the standard
         deviation was computed over the entire array, a zero-dimensional
         array is returned.
 
-        If `a` has a real-valued floating-point data type, the returned
-        array will have the same data type as `a`.
-        If `a` has a boolean or integral data type, the returned array
-        will have the default floating point data type for the device
-        where input array `a` is allocated.
-
     Limitations
     -----------
     Parameters `where` is only supported with its default value.
     Otherwise ``NotImplementedError`` exception will be raised.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Notes
     -----
@@ -828,17 +958,20 @@ def var(
 
     Parameters
     ----------
-    a : {dpnp_array, usm_ndarray}:
+    a : {dpnp_array, usm_ndarray}
         Input array.
     axis : int or tuple of ints, optional
         axis or axes along which the variances must be computed. If a tuple
         of unique integers is given, the variances are computed over multiple axes.
         If ``None``, the variance is computed over the entire array.
-        Default: `None`.
+        Default: ``None``.
     dtype : dtype, optional
-        Type to use in computing the variance. For arrays of integer type
-        the default real-valued floating-point data type is used,
-        for arrays of float types it is the same as the array type.
+        Type to use in computing the variance. By default, if `a` has a
+        floating-point data type, the returned array will have
+        the same data type as `a`.
+        If `a` has a boolean or integral data type, the returned array
+        will have the default floating point data type for the device
+        where input array `a` is allocated.
     out : {dpnp_array, usm_ndarray}, optional
         Alternative output array in which to place the result. It must have
         the same shape as the expected output but the type (of the calculated
@@ -858,20 +991,13 @@ def var(
     Returns
     -------
     out : dpnp.ndarray
-        an array containing the variances. If the variance was computed
+        An array containing the variances. If the variance was computed
         over the entire array, a zero-dimensional array is returned.
 
-        If `a` has a real-valued floating-point data type, the returned
-        array will have the same data type as `a`.
-        If `a` has a boolean or integral data type, the returned array
-        will have the default floating point data type for the device
-        where input array `a` is allocated.
-
     Limitations
     -----------
     Parameters `where` is only supported with its default value.
     Otherwise ``NotImplementedError`` exception will be raised.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
 
     Notes
     -----
diff --git a/dpnp/dpnp_iface_trigonometric.py b/dpnp/dpnp_iface_trigonometric.py
index fd5e4c1407e..2be5c96217a 100644
--- a/dpnp/dpnp_iface_trigonometric.py
+++ b/dpnp/dpnp_iface_trigonometric.py
@@ -1,8 +1,6 @@
-# cython: language_level=3
-# distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -69,6 +67,7 @@
     dpnp_log2,
     dpnp_log10,
     dpnp_logaddexp,
+    dpnp_reciprocal,
     dpnp_rsqrt,
     dpnp_sin,
     dpnp_sinh,
@@ -437,7 +436,7 @@ def arctan2(
     Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by real-valued data types.
 
     See Also
     --------
@@ -1305,7 +1304,7 @@ def logaddexp(
     Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword arguments `kwargs` are currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+    Input array data types are limited by real-valued data types.
 
     See Also
     --------
@@ -1346,14 +1345,14 @@ def logsumexp(x, axis=None, out=None, dtype=None, keepdims=False):
 
     Parameters
     ----------
-    x : {dpnp_array, usm_ndarray}
+    x : {dpnp.ndarray, usm_ndarray}
         Input array, expected to have a real-valued data type.
     axis : int or tuple of ints, optional
         Axis or axes along which values must be computed. If a tuple
         of unique integers, values are computed over multiple axes.
         If ``None``, the result is computed over the entire array.
         Default: ``None``.
-    out : {dpnp_array, usm_ndarray}, optional
+    out : {dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     dtype : data type, optional
@@ -1415,36 +1414,75 @@ def logsumexp(x, axis=None, out=None, dtype=None, keepdims=False):
     return dpnp.get_result_array(result, out, casting="same_kind")
 
 
-def reciprocal(x1, **kwargs):
+def reciprocal(
+    x,
+    /,
+    out=None,
+    *,
+    order="K",
+    where=True,
+    dtype=None,
+    subok=True,
+    **kwargs,
+):
     """
     Return the reciprocal of the argument, element-wise.
 
     For full documentation refer to :obj:`numpy.reciprocal`.
 
+    Parameters
+    ----------
+    x : {dpnp.ndarray, usm_ndarray}
+        Input array.
+    out : {None, dpnp.ndarray}, optional
+        Output array to populate.
+        Array must have the correct shape and the expected data type.
+    order : {"C", "F", "A", "K"}, optional
+        Memory layout of the newly output array, if parameter `out` is ``None``.
+        Default: "K".
+
+    Returns
+    -------
+    out : dpnp.ndarray
+        An array containing the element-wise reciprocals.
+
+    Notes
+    -----
+    If `x` has a integral data type, the output will have the default
+    real-valued floating-point data type for the device where
+    input array `x` is allocated. If `x` has a floating-point
+    data type, the output will have the same data type.
+
     Limitations
     -----------
-    Input array is supported as :obj:`dpnp.ndarray`.
+    Parameters `where`, `dtype` and `subok` are supported with their default values.
     Keyword argument `kwargs` is currently unsupported.
     Otherwise the function will be executed sequentially on CPU.
-    Input array data types are limited by supported DPNP :ref:`Data types`.
+
+    See Also
+    --------
+    :obj:`dpnp.rsqrt` : Return the reciprocal square-root of an array, element-wise.
 
     Examples
     --------
     >>> import dpnp as np
     >>> x = np.array([1, 2., 3.33])
-    >>> out = np.reciprocal(x)
-    >>> [i for i in out]
-    [1.0, 0.5, 0.3003003]
+    >>> np.reciprocal(x)
+    array([1.0, 0.5, 0.3003003])
 
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(
-        x1, copy_when_strides=False, copy_when_nondefault_queue=False
+    return check_nd_call_func(
+        numpy.reciprocal,
+        dpnp_reciprocal,
+        x,
+        out=out,
+        where=where,
+        order=order,
+        dtype=dtype,
+        subok=subok,
+        **kwargs,
     )
-    if x1_desc and not kwargs:
-        return dpnp_recip(x1_desc).get_pyobj()
-
-    return call_origin(numpy.reciprocal, x1, **kwargs)
 
 
 def reduce_hypot(x, axis=None, out=None, dtype=None, keepdims=False):
@@ -1453,14 +1491,14 @@ def reduce_hypot(x, axis=None, out=None, dtype=None, keepdims=False):
 
     Parameters
     ----------
-    x : {dpnp_array, usm_ndarray}
+    x : {dpnp.ndarray, usm_ndarray}
         Input array, expected to have a real-valued data type.
     axis : int or tuple of ints, optional
         Axis or axes along which values must be computed. If a tuple
         of unique integers, values are computed over multiple axes.
         If ``None``, the result is computed over the entire array.
         Default: ``None``.
-    out : {dpnp_array, usm_ndarray}, optional
+    out : {dpnp.ndarray, usm_ndarray}, optional
         If provided, the result will be inserted into this array. It should
         be of the appropriate shape and dtype.
     dtype : data type, optional
diff --git a/dpnp/dpnp_iface_types.py b/dpnp/dpnp_iface_types.py
index 69a713f26d4..5767d466775 100644
--- a/dpnp/dpnp_iface_types.py
+++ b/dpnp/dpnp_iface_types.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_utils/__init__.pxd b/dpnp/dpnp_utils/__init__.pxd
index e0da397b168..b75421b748a 100644
--- a/dpnp/dpnp_utils/__init__.pxd
+++ b/dpnp/dpnp_utils/__init__.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_utils/__init__.py b/dpnp/dpnp_utils/__init__.py
index e198f23aff3..8dbf9017fab 100644
--- a/dpnp/dpnp_utils/__init__.py
+++ b/dpnp/dpnp_utils/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pxd b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
index b7c739dcfbb..cd5415f6867 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pxd
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pxd
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_utils/dpnp_algo_utils.pyx b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
index aca73463a91..15451cda500 100644
--- a/dpnp/dpnp_utils/dpnp_algo_utils.pyx
+++ b/dpnp/dpnp_utils/dpnp_algo_utils.pyx
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
new file mode 100644
index 00000000000..d0add55eee3
--- /dev/null
+++ b/dpnp/dpnp_utils/dpnp_utils_linearalgebra.py
@@ -0,0 +1,363 @@
+# *****************************************************************************
+# Copyright (c) 2024, Intel Corporation
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# - Redistributions of source code must retain the above copyright notice,
+#   this list of conditions and the following disclaimer.
+# - Redistributions in binary form must reproduce the above copyright notice,
+#   this list of conditions and the following disclaimer in the documentation
+#   and/or other materials provided with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+# THE POSSIBILITY OF SUCH DAMAGE.
+# *****************************************************************************
+
+import dpctl
+import dpctl.tensor._tensor_impl as ti
+import numpy
+
+import dpnp
+import dpnp.backend.extensions.blas._blas_impl as bi
+from dpnp.dpnp_utils import get_usm_allocations
+
+__all__ = ["dpnp_matmul"]
+
+
+def _gemm_res_dtype(*arrays, dtype, casting, sycl_queue):
+    """
+    Determines the output array data type and the intermediate data type.
+
+    If dtype is ``None``, the output array data type is determined based on
+    the Promotion Type Rule and device capabilities. Otherwise, `dtype` is
+    used as output array dtype if input arrays can cast to it according to
+    the casting rule determined. If casting cannot be done, a ``TypeError``
+    is raised.
+    The intermediate data type is the data type used for performing matmul
+    operation calculations. If output array dtype is a floating-point data type,
+    it is also used for the intermediate data type. If output array dtype is an
+    integral data type, the default floating point data type of the device where
+    input arrays are allocated on are used for intermediate data type.
+
+    Parameters
+    ----------
+    arrays : {dpnp.ndarray, usm_ndarray}
+        Input arrays.
+    dtype : dtype
+        If not ``None``, data type of the output array.
+    casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
+        Controls what kind of data casting may occur.
+    sycl_queue : {SyclQueue}
+        A SYCL queue to use for determining default floating point datat type.
+
+    Returns
+    -------
+    gemm_dtype, res_dtype :
+        `gemm_dtype` is the data type used in performing matmul calculations.
+        The input arrays of matmul function are cast to `gemm_dtype` and then
+        the calculations are performed.
+        `res_dtype` is the output data type. When the result is obtained, it is cast
+        to `res_dtype`.
+
+    """
+
+    res_dtype = dpnp.result_type(*arrays)
+    default_dtype = dpnp.default_float_type(sycl_queue=sycl_queue)
+
+    if dtype is not None:
+        if dpnp.can_cast(res_dtype, dtype, casting=casting):
+            res_dtype = dtype
+        else:
+            raise TypeError(
+                f"Cannot cast ufunc 'matmul' output from dtype({res_dtype}) to dtype({dtype}) with casting rule {casting}"
+            )
+
+    gemm_dtype = (
+        res_dtype if dpnp.issubdtype(res_dtype, dpnp.inexact) else default_dtype
+    )
+
+    return gemm_dtype, res_dtype
+
+
+def _gemm_batch_matmul(exec_q, x1, x2, res, x1_is_2D, x2_is_2D, dev_tasks_list):
+    # If input array is F-contiguous, we need to change the order to C-contiguous.
+    # because mkl::gemm_bacth needs each 2D array to be F-contiguous but
+    # when the input array is F-contiguous, the data of 2D array
+    # that needs to be called in mkl::gemm_batch are not contiguous.
+    ht_tasks_list = []
+    x1 = _get_gemm_contig_array(x1, dev_tasks_list, ht_tasks_list)
+    x2 = _get_gemm_contig_array(x2, dev_tasks_list, ht_tasks_list)
+
+    x1_strides = x1.strides
+    x2_strides = x2.strides
+    res_strides = res.strides
+
+    # when shape along any particular dimension is 1,
+    # the stride along that dimension is not a
+    # meaningful number and is undefined. Here, we
+    # standardizing strides before continuing,
+    # setting stride to 0 if the shape along that axis is <=1
+    if x1_is_2D:
+        x1_strides = tuple(
+            str_i if sh_i > 1 else 0
+            for sh_i, str_i in zip(x1.shape, x1_strides)
+        )
+    if x2_is_2D:
+        x2_strides = tuple(
+            str_i if sh_i > 1 else 0
+            for sh_i, str_i in zip(x2.shape, x2_strides)
+        )
+
+    batch_size = res.shape[:-2][0]
+    stridea = x1_strides[0]
+    strideb = x2_strides[0]
+    stridec = res_strides[-3]
+
+    if x1.ndim > 3:
+        iter = ti._contract_iter2(
+            res.shape[:-2], x1_strides[:-2], x2_strides[:-2]
+        )
+
+        if len(iter[0]) != 1:
+            raise ValueError("Input arrays cannot be used in gemm_batch")
+        batch_size = iter[0][0]
+        stridea = iter[1][0]
+        strideb = iter[3][0]
+
+    ht_blas_ev, _ = bi._gemm_batch(
+        exec_q,
+        dpnp.get_usm_ndarray(x1),
+        dpnp.get_usm_ndarray(x2),
+        dpnp.get_usm_ndarray(res),
+        batch_size,
+        stridea,
+        strideb,
+        stridec,
+        dev_tasks_list,
+    )
+
+    return ht_blas_ev, ht_tasks_list, res
+
+
+def _get_gemm_contig_array(x, dep_events, host_events, dtype=None):
+    """
+    Creating a copy of input array if needed.
+
+    This function has two use cases. In the first use case, which is more general,
+    if the input array is not c-contiguous or f-contiguous, we ensure it becomes
+    c-contiguous. Additionally, if the input array has an integral dtype, we
+    convert it to an appropriate floating-point data type specified by `dtype`.
+    In the second use case, which is for N-dimensional arrays with N>2, we need
+    to ensure c-contiguity. This is crucial because the implementation of the
+    `gemm_batch` function in dpnp only works for C-contiguous arrays. This use case
+    is essential when the input array is f-contiguous with floating point dtype for
+    which the array is not modified in the first use case.
+
+    """
+
+    if dtype is None:
+        copy = not x.flags.c_contiguous
+    else:
+        copy = (
+            not (x.flags.c_contiguous or x.flags.f_contiguous)
+            or x.dtype != dtype
+        )
+
+    if copy:
+        x_copy = dpnp.empty_like(x, dtype=dtype, order="C")
+        ht_copy_ev, copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=dpnp.get_usm_ndarray(x),
+            dst=x_copy.get_array(),
+            sycl_queue=x.sycl_queue,
+        )
+        dep_events.append(copy_ev)
+        host_events.append(ht_copy_ev)
+        return x_copy
+    return x
+
+
+def dpnp_matmul(
+    x1,
+    x2,
+    /,
+    out=None,
+    *,
+    casting="same_kind",
+    order="K",
+    dtype=None,
+):
+    """
+    dpnp_matmul(x1, x2, out=None, casting="same_kind", order="K", dtype=None)
+
+    Return the matrix product of two arrays.
+
+    The main calculation is done by calling an extension function
+    for BLAS library of OneMKL. Depending on dimension of `x1` and `x2` arrays,
+    it will be either ``gemm`` (for one- and two-dimentional arrays) or
+    ``gemm_batch``(for others).
+
+    """
+
+    x1_ndim = x1.ndim
+    x2_ndim = x2.ndim
+
+    if x1_ndim == 0:
+        raise ValueError(
+            "input array 0 does not have enough dimensions (has 0, but requires at least 1)"
+        )
+    if x2_ndim == 0:
+        raise ValueError(
+            "input array 1 does not have enough dimensions (has 0, but requires at least 1)"
+        )
+
+    res_usm_type, exec_q = get_usm_allocations([x1, x2])
+
+    squeeze_flag = x1_ndim == 1 or x2_ndim == 1
+    if x1_ndim == 1:
+        x1 = x1[dpnp.newaxis, :]
+        x1_ndim = x1.ndim
+
+    if x2_ndim == 1:
+        x2 = x2[:, dpnp.newaxis]
+        x2_ndim = x2.ndim
+
+    x1_shape = x1.shape
+    x2_shape = x2.shape
+    if x1_shape[-1] != x2_shape[-2]:
+        raise ValueError(
+            "Input arrays have a mismatch in their core dimensions. "
+            "The core dimensions should follow this signature: (n?,k),(k,m?)->(n?,m?) "
+            f"(size {x1_shape[-1]} is different from {x2_shape[-2]})"
+        )
+
+    # Determine the appropriate data types
+    gemm_dtype, res_dtype = _gemm_res_dtype(
+        x1, x2, dtype=dtype, casting=casting, sycl_queue=exec_q
+    )
+
+    x1_is_2D = x1_ndim == 2 or numpy.prod(x1_shape[:-2]) == 1  # inherently 2D
+    x2_is_2D = x2_ndim == 2 or numpy.prod(x2_shape[:-2]) == 1
+
+    # find the result shape
+    if x1_is_2D and x2_is_2D:
+        x1 = x1.reshape(x1.shape[-2], x1.shape[-1])
+        x2 = x2.reshape(x2.shape[-2], x2.shape[-1])
+        res_shape = (x1.shape[-2], x2.shape[-1])
+    else:
+        # makes the dimension of input the same by adding new axis
+        if x1_ndim != x2_ndim:
+            diff = abs(x1_ndim - x2_ndim)
+            if x1_ndim < x2_ndim:
+                x1 = x1.reshape((1,) * diff + x1.shape)
+                x1_ndim = x1.ndim
+                x1_shape = x1.shape
+            else:
+                x2 = x2.reshape((1,) * diff + x2.shape)
+                x2_ndim = x2.ndim
+                x2_shape = x2.shape
+
+        # examining the option to align inputs
+        # when their shapes differ but they are 1-D in some dimensions.
+        tmp_shape = list(x1_shape[:-2])
+        for i in range(x1_ndim - 2):
+            if x1_shape[i] != x2_shape[i]:
+                if x1_shape[i] == 1:
+                    tmp_shape[i] = x2_shape[i]
+                    # If the `x1` array is inherently 2D, there's no need to
+                    # duplicate the data for the 1-D dimension;
+                    # GEMM handles it automatically.
+                    if not x1_is_2D:
+                        x1 = dpnp.repeat(x1, x2_shape[i], axis=i)
+                elif x2_shape[i] == 1:
+                    tmp_shape[i] = x1_shape[i]
+                    if not x2_is_2D:
+                        x2 = dpnp.repeat(x2, x1_shape[i], axis=i)
+                else:
+                    raise ValueError(
+                        "arrays could not be broadcast together with remapped shapes."
+                    )
+        x1_shape = x1.shape
+        x2_shape = x2.shape
+        res_shape = tuple(tmp_shape) + (x1_shape[-2], x2_shape[-1])
+
+    # calculate results
+    result = dpnp.empty(
+        res_shape,
+        dtype=gemm_dtype,
+        usm_type=res_usm_type,
+        sycl_queue=exec_q,
+    )
+    if result.size == 0:
+        pass
+    elif x1.size == 0 or x2.size == 0:
+        result.fill(0)
+    else:
+        # input arrays should have the proper data type
+        # and be C_CONTIGUOUS or F_CONTIGUOUS
+        dep_events_list = []
+        host_tasks_list = []
+        x1 = _get_gemm_contig_array(
+            x1, dep_events_list, host_tasks_list, gemm_dtype
+        )
+        x2 = _get_gemm_contig_array(
+            x2, dep_events_list, host_tasks_list, gemm_dtype
+        )
+
+        if x1_is_2D and x2_is_2D:
+            ht_blas_ev, _ = bi._gemm(
+                exec_q,
+                dpnp.get_usm_ndarray(x1),
+                dpnp.get_usm_ndarray(x2),
+                dpnp.get_usm_ndarray(result),
+                dep_events_list,
+            )
+        else:
+            (
+                ht_blas_ev,
+                ht_copy_ev,
+                result,
+            ) = _gemm_batch_matmul(
+                exec_q,
+                x1,
+                x2,
+                result,
+                x1_is_2D,
+                x2_is_2D,
+                dep_events_list,
+            )
+            host_tasks_list += ht_copy_ev
+
+        host_tasks_list.append(ht_blas_ev)
+        dpctl.SyclEvent.wait_for(host_tasks_list)
+
+    if squeeze_flag:
+        result = dpnp.squeeze(result)
+
+    if x1_is_2D and x2_is_2D:
+        # add new axes only if one of the input arrays
+        # was inehrently 2D
+        new_size = max(x1_ndim, x2_ndim)
+        for _ in range(new_size - 2):
+            result = result[dpnp.newaxis, :]
+
+    if gemm_dtype != res_dtype:
+        result = dpnp.astype(result, res_dtype, copy=False)
+    if out is None:
+        # If `order` was not passed as default
+        # we need to update it to match the passed `order`.
+        if order not in ["k", "K"]:
+            return dpnp.array(result, copy=False, order=order)
+        else:
+            return result
+    else:
+        return dpnp.get_result_array(result, out, casting=casting)
diff --git a/dpnp/dpnp_utils/dpnp_utils_statistics.py b/dpnp/dpnp_utils/dpnp_utils_statistics.py
index 7ed82953541..36f74cf79a2 100644
--- a/dpnp/dpnp_utils/dpnp_utils_statistics.py
+++ b/dpnp/dpnp_utils/dpnp_utils_statistics.py
@@ -1,8 +1,5 @@
-# cython: language_level=3
-# distutils: language = c++
-# -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2023, Intel Corporation
+# Copyright (c) 2023-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/fft/__init__.py b/dpnp/fft/__init__.py
index 0c980d00bcf..1b743518d79 100644
--- a/dpnp/fft/__init__.py
+++ b/dpnp/fft/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/fft/dpnp_algo_fft.pyx b/dpnp/fft/dpnp_algo_fft.pyx
index 0ffb4b032ce..5fe7e09414b 100644
--- a/dpnp/fft/dpnp_algo_fft.pyx
+++ b/dpnp/fft/dpnp_algo_fft.pyx
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/fft/dpnp_iface_fft.py b/dpnp/fft/dpnp_iface_fft.py
index c82b3609649..f8609293701 100644
--- a/dpnp/fft/dpnp_iface_fft.py
+++ b/dpnp/fft/dpnp_iface_fft.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/linalg/__init__.py b/dpnp/linalg/__init__.py
index 40d9338cf8b..58959481bea 100644
--- a/dpnp/linalg/__init__.py
+++ b/dpnp/linalg/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/linalg/dpnp_algo_linalg.pyx b/dpnp/linalg/dpnp_algo_linalg.pyx
index c86b869acd3..1d94a893fff 100644
--- a/dpnp/linalg/dpnp_algo_linalg.pyx
+++ b/dpnp/linalg/dpnp_algo_linalg.pyx
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -45,12 +45,9 @@ cimport numpy
 cimport dpnp.dpnp_utils as utils
 
 __all__ = [
-    "dpnp_cholesky",
     "dpnp_cond",
-    "dpnp_det",
     "dpnp_eig",
     "dpnp_eigvals",
-    "dpnp_inv",
     "dpnp_matrix_rank",
     "dpnp_norm",
     "dpnp_qr",
@@ -68,9 +65,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_func_ptr_t_)(c_dpctl.
 ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_with_size_func_ptr_t_)(c_dpctl.DPCTLSyclQueueRef,
                                                                                   void *, void * , size_t,
                                                                                   const c_dpctl.DPCTLEventVectorRef)
-ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_1out_with_2size_func_ptr_t_)(c_dpctl.DPCTLSyclQueueRef,
-                                                                                   void *, void * , size_t, size_t,
-                                                                                   const c_dpctl.DPCTLEventVectorRef)
 ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_1in_3out_shape_t)(c_dpctl.DPCTLSyclQueueRef,
                                                                     void *, void * , void * , void * ,
                                                                     size_t , size_t, const c_dpctl.DPCTLEventVectorRef)
@@ -79,43 +73,6 @@ ctypedef c_dpctl.DPCTLSyclEventRef(*custom_linalg_2in_1out_func_ptr_t)(c_dpctl.D
                                                                        const c_dpctl.DPCTLEventVectorRef)
 
 
-cpdef utils.dpnp_descriptor dpnp_cholesky(utils.dpnp_descriptor input_):
-    size_ = input_.shape[-1]
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input_.dtype)
-
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_CHOLESKY_EXT, param1_type, param1_type)
-
-    input_obj = input_.get_array()
-
-    # create result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_.shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=input_obj.sycl_device,
-                                                                       usm_type=input_obj.usm_type,
-                                                                       sycl_queue=input_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_linalg_1in_1out_with_2size_func_ptr_t_ func = <custom_linalg_1in_1out_with_2size_func_ptr_t_ > kernel_data.ptr
-
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    input_.get_data(),
-                                                    result.get_data(),
-                                                    input_.size,
-                                                    size_,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef object dpnp_cond(object input, object p):
     if p in ('f', 'fro'):
         # TODO: change order='K' when support is implemented
@@ -140,47 +97,6 @@ cpdef object dpnp_cond(object input, object p):
     return ret
 
 
-cpdef utils.dpnp_descriptor dpnp_det(utils.dpnp_descriptor input):
-    cdef shape_type_c input_shape = input.shape
-    cdef size_t n = input.shape[-1]
-    cdef shape_type_c result_shape = (1,)
-    if input.ndim != 2:
-        result_shape = tuple((list(input.shape))[:-2])
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
-
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_DET_EXT, param1_type, param1_type)
-
-    input_obj = input.get_array()
-
-    # create result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(result_shape,
-                                                                       kernel_data.return_type,
-                                                                       None,
-                                                                       device=input_obj.sycl_device,
-                                                                       usm_type=input_obj.usm_type,
-                                                                       sycl_queue=input_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef custom_linalg_1in_1out_func_ptr_t func = <custom_linalg_1in_1out_func_ptr_t > kernel_data.ptr
-
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    input.get_data(),
-                                                    result.get_data(),
-                                                    input_shape.data(),
-                                                    input.ndim,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef tuple dpnp_eig(utils.dpnp_descriptor x1):
     cdef shape_type_c x1_shape = x1.shape
 
@@ -269,46 +185,6 @@ cpdef utils.dpnp_descriptor dpnp_eigvals(utils.dpnp_descriptor input):
     return res_val
 
 
-cpdef utils.dpnp_descriptor dpnp_inv(utils.dpnp_descriptor input):
-    cdef shape_type_c input_shape = input.shape
-
-    cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
-
-    cdef DPNPFuncData kernel_data = get_dpnp_function_ptr(DPNP_FN_INV_EXT, param1_type, param1_type)
-
-    input_obj = input.get_array()
-
-    cdef (DPNPFuncType, void *) ret_type_and_func = utils.get_ret_type_and_func(kernel_data,
-                                                                                input_obj.sycl_device.has_aspect_fp64)
-    cdef DPNPFuncType return_type = ret_type_and_func[0]
-    cdef custom_linalg_1in_1out_func_ptr_t func = < custom_linalg_1in_1out_func_ptr_t > ret_type_and_func[1]
-
-    # create result array with type given by FPTR data
-    cdef utils.dpnp_descriptor result = utils.create_output_descriptor(input_shape,
-                                                                       return_type,
-                                                                       None,
-                                                                       device=input_obj.sycl_device,
-                                                                       usm_type=input_obj.usm_type,
-                                                                       sycl_queue=input_obj.sycl_queue)
-
-    result_sycl_queue = result.get_array().sycl_queue
-
-    cdef c_dpctl.SyclQueue q = <c_dpctl.SyclQueue> result_sycl_queue
-    cdef c_dpctl.DPCTLSyclQueueRef q_ref = q.get_queue_ref()
-
-    cdef c_dpctl.DPCTLSyclEventRef event_ref = func(q_ref,
-                                                    input.get_data(),
-                                                    result.get_data(),
-                                                    input_shape.data(),
-                                                    input.ndim,
-                                                    NULL)  # dep_events_ref
-
-    with nogil: c_dpctl.DPCTLEvent_WaitAndThrow(event_ref)
-    c_dpctl.DPCTLEvent_Delete(event_ref)
-
-    return result
-
-
 cpdef utils.dpnp_descriptor dpnp_matrix_rank(utils.dpnp_descriptor input):
     cdef shape_type_c input_shape = input.shape
     cdef DPNPFuncType param1_type = dpnp_dtype_to_DPNPFuncType(input.dtype)
diff --git a/dpnp/linalg/dpnp_iface_linalg.py b/dpnp/linalg/dpnp_iface_linalg.py
index c9c9c855728..800aa8de1bb 100644
--- a/dpnp/linalg/dpnp_iface_linalg.py
+++ b/dpnp/linalg/dpnp_iface_linalg.py
@@ -1,8 +1,5 @@
-# cython: language_level=3
-# distutils: language = c++
-# -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -50,7 +47,11 @@
 from .dpnp_utils_linalg import (
     check_stacked_2d,
     check_stacked_square,
+    dpnp_cholesky,
+    dpnp_det,
     dpnp_eigh,
+    dpnp_inv,
+    dpnp_slogdet,
     dpnp_solve,
 )
 
@@ -69,55 +70,68 @@
     "qr",
     "solve",
     "svd",
+    "slogdet",
 ]
 
 
-def cholesky(input):
+def cholesky(a, upper=False):
     """
     Cholesky decomposition.
 
-    Return the Cholesky decomposition, `L * L.H`, of the square matrix `input`,
-    where `L` is lower-triangular and .H is the conjugate transpose operator
-    (which is the ordinary transpose if `input` is real-valued).  `input` must be
-    Hermitian (symmetric if real-valued) and positive-definite. No
-    checking is performed to verify whether `a` is Hermitian or not.
-    In addition, only the lower-triangular and diagonal elements of `input`
-    are used. Only `L` is actually returned.
+    Return the lower or upper Cholesky decomposition, ``L * L.H`` or
+    ``U.H * U``, of the square matrix ``a``, where ``L`` is lower-triangular,
+    ``U`` is upper-triangular, and ``.H`` is the conjugate transpose operator
+    (which is the ordinary transpose if ``a`` is real-valued). ``a`` must be
+    Hermitian (symmetric if real-valued) and positive-definite. No checking is
+    performed to verify whether ``a`` is Hermitian or not. In addition, only
+    the lower or upper-triangular and diagonal elements of ``a`` are used.
+    Only ``L`` or ``U`` is actually returned.
+
+    For full documentation refer to :obj:`numpy.linalg.cholesky`.
 
     Parameters
     ----------
-    input : (..., M, M) array_like
+    a : (..., M, M) {dpnp.ndarray, usm_ndarray}
         Hermitian (symmetric if all elements are real), positive-definite
         input matrix.
+    upper : bool, optional
+        If ``True``, the result must be the upper-triangular Cholesky factor.
+        If ``False``, the result must be the lower-triangular Cholesky factor.
+        Default: ``False``.
 
     Returns
     -------
-    L : (..., M, M) array_like
-        Upper or lower-triangular Cholesky factor of `input`.  Returns a
-        matrix object if `input` is a matrix object.
+    L : (..., M, M) dpnp.ndarray
+        Lower or upper-triangular Cholesky factor of `a`.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> A = np.array([[1.0, 2.0],[2.0, 5.0]])
+    >>> A
+    array([[1., 2.],
+           [2., 5.]])
+    >>> L = np.linalg.cholesky(A)
+    >>> L
+    array([[1., 0.],
+           [2., 1.]])
+    >>> np.dot(L, L.T.conj()) # verify that L * L.H = A
+    array([[1., 2.],
+           [2., 5.]])
+
+    The upper-triangular Cholesky factor can also be obtained:
+
+    >>> np.linalg.cholesky(A, upper=True)
+    array([[ 1.+0.j, -0.-2.j],
+           [ 0.+0.j,  1.+0.j]]
+
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if x1_desc.shape[-1] != x1_desc.shape[-2]:
-            pass
-        else:
-            if input.dtype == dpnp.int32 or input.dtype == dpnp.int64:
-                dev = x1_desc.get_array().sycl_device
-                if dev.has_aspect_fp64:
-                    dtype = dpnp.float64
-                else:
-                    dtype = dpnp.float32
-                # TODO memory copy. needs to move into DPNPC
-                input_ = dpnp.get_dpnp_descriptor(
-                    dpnp.astype(input, dtype=dtype),
-                    copy_when_nondefault_queue=False,
-                )
-            else:
-                input_ = x1_desc
-            return dpnp_cholesky(input_).get_pyobj()
-
-    return call_origin(numpy.linalg.cholesky, input)
+    dpnp.check_supported_arrays_type(a)
+    check_stacked_2d(a)
+    check_stacked_square(a)
+
+    return dpnp_cholesky(a, upper=upper)
 
 
 def cond(input, p=None):
@@ -148,32 +162,50 @@ def cond(input, p=None):
     return call_origin(numpy.linalg.cond, input, p)
 
 
-def det(input):
+def det(a):
     """
     Compute the determinant of an array.
 
+    For full documentation refer to :obj:`numpy.linalg.det`.
+
     Parameters
     ----------
-    input : (..., M, M) array_like
+    a : (..., M, M) {dpnp.ndarray, usm_ndarray}
         Input array to compute determinants for.
 
     Returns
     -------
-    det : (...) array_like
-        Determinant of `input`.
-    """
+    det : (...) dpnp.ndarray
+        Determinant of `a`.
 
-    x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if x1_desc.ndim < 2:
-            pass
-        elif x1_desc.shape[-1] == x1_desc.shape[-2]:
-            result_obj = dpnp_det(x1_desc).get_pyobj()
-            result = dpnp.convert_single_elem_array_to_scalar(result_obj)
+    See Also
+    --------
+    :obj:`dpnp.linalg.slogdet` : Returns sign and logarithm of the determinant of an array.
 
-            return result
+    Examples
+    --------
+    The determinant of a 2-D array [[a, b], [c, d]] is ad - bc:
+
+    >>> import dpnp as dp
+    >>> a = dp.array([[1, 2], [3, 4]])
+    >>> dp.linalg.det(a)
+    array(-2.)
+
+    Computing determinants for a stack of matrices:
+
+    >>> a = dp.array([ [[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]] ])
+    >>> a.shape
+    (3, 2, 2)
+    >>> dp.linalg.det(a)
+    array([-2., -3., -8.])
+
+    """
+
+    dpnp.check_supported_arrays_type(a)
+    check_stacked_2d(a)
+    check_stacked_square(a)
 
-    return call_origin(numpy.linalg.det, input)
+    return dpnp_det(a)
 
 
 def eig(x1):
@@ -285,30 +317,51 @@ def eigvals(input):
     return call_origin(numpy.linalg.eigvals, input)
 
 
-def inv(input):
+def inv(a):
     """
-    Divide arguments element-wise.
+    Compute the (multiplicative) inverse of a matrix.
+
+    Given a square matrix a, return the matrix ainv
+    satisfying ``dot(a, ainv) = dot(ainv, a) = eye(a.shape[0])``.
 
     For full documentation refer to :obj:`numpy.linalg.inv`.
 
-    Limitations
-    -----------
-        Input array is supported as :obj:`dpnp.ndarray`.
-        Dimension of input array is supported to be equal to ``2``.
-        Shape of input array is limited by ``input.shape[0] == input.shape[1]``, ``input.shape[0] >= 2``.
-        Otherwise the function will be executed sequentially on CPU.
+    Parameters
+    ----------
+    a : (..., M, M) {dpnp.ndarray, usm_ndarray}
+        Matrix to be inverted.
+
+    Returns
+    -------
+    out : (..., M, M) dpnp.ndarray
+        (Multiplicative) inverse of the matrix a.
+
+    Examples
+    --------
+    >>> import dpnp as np
+    >>> a = np.array([[1., 2.], [3., 4.]])
+    >>> ainv = np.linalg.inv(a)
+    >>> np.allclose(np.dot(a, ainv), np.eye(2))
+    array([ True])
+    >>> np.allclose(np.dot(ainv, a), np.eye(2))
+    array([ True])
+
+    Inverses of several matrices can be computed at once:
+    >>> a = np.array([[[1., 2.], [3., 4.]], [[1, 3], [3, 5]]])
+    >>> np.linalg.inv(a)
+    array([[[-2.  ,  1.  ],
+            [ 1.5 , -0.5 ]],
+
+          [[-1.25,  0.75],
+            [ 0.75, -0.25]]])
+
     """
 
-    x1_desc = dpnp.get_dpnp_descriptor(input, copy_when_nondefault_queue=False)
-    if x1_desc:
-        if (
-            x1_desc.ndim == 2
-            and x1_desc.shape[0] == x1_desc.shape[1]
-            and x1_desc.shape[0] >= 2
-        ):
-            return dpnp_inv(x1_desc).get_pyobj()
+    dpnp.check_supported_arrays_type(a)
+    check_stacked_2d(a)
+    check_stacked_square(a)
 
-    return call_origin(numpy.linalg.inv, input)
+    return dpnp_inv(a)
 
 
 def matrix_power(input, count):
@@ -633,3 +686,59 @@ def svd(x1, full_matrices=True, compute_uv=True, hermitian=False):
     return call_origin(
         numpy.linalg.svd, x1, full_matrices, compute_uv, hermitian
     )
+
+
+def slogdet(a):
+    """
+    Compute the sign and (natural) logarithm of the determinant of an array.
+
+    For full documentation refer to :obj:`numpy.linalg.slogdet`.
+
+    Parameters
+    ----------
+    a : (..., M, M) {dpnp.ndarray, usm_ndarray}
+        Input array, has to be a square 2-D array.
+
+    Returns
+    -------
+    sign : (...) dpnp.ndarray
+        A number representing the sign of the determinant. For a real matrix,
+        this is 1, 0, or -1. For a complex matrix, this is a complex number
+        with absolute value 1 (i.e., it is on the unit circle), or else 0.
+    logabsdet : (...) dpnp.ndarray
+        The natural log of the absolute value of the determinant.
+
+    See Also
+    --------
+    :obj:`dpnp.det` : Returns the determinant of an array.
+
+    Examples
+    --------
+    The determinant of a 2-D array [[a, b], [c, d]] is ad - bc:
+
+    >>> import dpnp as dp
+    >>> a = dp.array([[1, 2], [3, 4]])
+    >>> (sign, logabsdet) = dp.linalg.slogdet(a)
+    >>> (sign, logabsdet)
+    (array(-1.), array(0.69314718))
+    >>> sign * dp.exp(logabsdet)
+    array(-2.)
+
+    Computing log-determinants for a stack of matrices:
+
+    >>> a = dp.array([ [[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]] ])
+    >>> a.shape
+    (3, 2, 2)
+    >>> sign, logabsdet = dp.linalg.slogdet(a)
+    >>> (sign, logabsdet)
+    (array([-1., -1., -1.]), array([0.69314718, 1.09861229, 2.07944154]))
+    >>> sign * dp.exp(logabsdet)
+    array([-2., -3., -8.])
+
+    """
+
+    dpnp.check_supported_arrays_type(a)
+    check_stacked_2d(a)
+    check_stacked_square(a)
+
+    return dpnp_slogdet(a)
diff --git a/dpnp/linalg/dpnp_utils_linalg.py b/dpnp/linalg/dpnp_utils_linalg.py
index a3a2802072c..f2632b5b6a4 100644
--- a/dpnp/linalg/dpnp_utils_linalg.py
+++ b/dpnp/linalg/dpnp_utils_linalg.py
@@ -1,8 +1,5 @@
-# cython: language_level=3
-# distutils: language = c++
-# -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2023, Intel Corporation
+# Copyright (c) 2023-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -37,13 +34,118 @@
 __all__ = [
     "check_stacked_2d",
     "check_stacked_square",
+    "dpnp_cholesky",
+    "dpnp_det",
     "dpnp_eigh",
+    "dpnp_inv",
+    "dpnp_slogdet",
     "dpnp_solve",
 ]
 
 _jobz = {"N": 0, "V": 1}
 _upper_lower = {"U": 0, "L": 1}
 
+_real_types_map = {
+    "float32": "float32",  # single : single
+    "float64": "float64",  # double : double
+    "complex64": "float32",  # csingle : csingle
+    "complex128": "float64",  # cdouble : cdouble
+}
+
+
+def _calculate_determinant_sign(ipiv, diag, res_type, n):
+    """
+    Calculate the sign of the determinant based on row exchanges and diagonal values.
+
+    Parameters
+    -----------
+    ipiv : {dpnp.ndarray, usm_ndarray}
+        The pivot indices from LU decomposition.
+    diag : {dpnp.ndarray, usm_ndarray}
+        The diagonal elements of the LU decomposition matrix.
+    res_type : dpnp.dtype
+        The common data type for linalg operations.
+    n : int
+        The size of the last two dimensions of the array.
+
+    Returns
+    -------
+    sign : {dpnp_array, usm_ndarray}
+        The sign of the determinant.
+
+    """
+
+    # Checks for row exchanges in LU decomposition affecting determinant sign.
+    ipiv_diff = ipiv != dpnp.arange(
+        1, n + 1, usm_type=ipiv.usm_type, sycl_queue=ipiv.sycl_queue
+    )
+
+    # Counts row exchanges from 'ipiv_diff'.
+    non_zero = dpnp.count_nonzero(ipiv_diff, axis=-1)
+
+    # For floating types, adds count of negative diagonal elements
+    # to determine determinant sign.
+    if dpnp.issubdtype(res_type, dpnp.floating):
+        non_zero += dpnp.count_nonzero(diag < 0, axis=-1)
+
+    sign = (non_zero % 2) * -2 + 1
+
+    # For complex types, compute sign from the phase of diagonal elements.
+    if dpnp.issubdtype(res_type, dpnp.complexfloating):
+        sign = sign * dpnp.prod(diag / dpnp.abs(diag), axis=-1)
+
+    return sign.astype(res_type)
+
+
+def _check_lapack_dev_info(dev_info, error_msg=None):
+    """
+    Check `dev_info` from OneMKL LAPACK routines, raising an error for failures.
+
+    Parameters
+    ----------
+    dev_info : list of ints
+        Each element of the list indicates the status of OneMKL LAPACK routine calls.
+        A non-zero value signifies a failure.
+
+    error_message : str, optional
+        Custom error message for detected LAPACK errors.
+        Default: `Singular matrix`
+
+    Raises
+    ------
+    dpnp.linalg.LinAlgError
+        On non-zero elements in dev_info, indicating LAPACK errors.
+
+    """
+
+    if any(dev_info):
+        error_msg = error_msg or "Singular matrix"
+
+        raise dpnp.linalg.LinAlgError(error_msg)
+
+
+def _real_type(dtype, device=None):
+    """
+    Returns the real data type corresponding to a given dpnp data type.
+
+    Parameters
+    ----------
+    dtype : dpnp.dtype
+        The dtype for which to find the corresponding real data type.
+    device : {None, string, SyclDevice, SyclQueue}, optional
+        An array API concept of device where an array of default floating type might be created.
+
+    Returns
+    -------
+    out : str
+        The name of the real data type.
+
+    """
+
+    default = dpnp.default_float_type(device)
+    real_type = _real_types_map.get(dtype.name, default)
+    return dpnp.dtype(real_type)
+
 
 def check_stacked_2d(*arrays):
     """
@@ -53,7 +155,7 @@ def check_stacked_2d(*arrays):
 
     Parameters
     ----------
-    arrays : {dpnp_array, usm_ndarray}
+    arrays : {dpnp.ndarray, usm_ndarray}
         A sequence of input arrays to check for dimensionality.
 
     Returns
@@ -92,7 +194,7 @@ def check_stacked_square(*arrays):
 
     Parameters
     ----------
-    arrays : {dpnp_array, usm_ndarray}
+    arrays : {dpnp.ndarray, usm_ndarray}
         A sequence of input arrays to check for square matrix shape.
 
     Returns
@@ -117,8 +219,6 @@ def check_stacked_square(*arrays):
 
 def _common_type(*arrays):
     """
-    _common_type(*arrays)
-
     Common type for linear algebra operations.
 
     This function determines the common data type for linalg operations.
@@ -129,12 +229,15 @@ def _common_type(*arrays):
     - The default floating-point data type is determined by the capabilities of the device
       on which `arrays` are created, as indicated by `dpnp.default_float_type()`.
 
-    Args:
-        *arrays (dpnp.ndarray): Input arrays.
-
-    Returns:
-        dtype_common (dtype): The common data type for linalg operations.
+    Parameters
+    ----------
+    arrays : {dpnp.ndarray, usm_ndarray}
+        A sequence of input arrays.
 
+    Returns
+    -------
+    dtype_common : dpnp.dtype
+        The common data type for linalg operations.
         This returned value is applicable both as the precision to be used
         in linalg calls and as the dtype of (possibly complex) output(s).
 
@@ -150,30 +253,393 @@ def _common_type(*arrays):
 
 def _common_inexact_type(default_dtype, *dtypes):
     """
-    _common_inexact_type(default_dtype, *dtypes)
-
     Determines the common 'inexact' data type for linear algebra operations.
 
     This function selects an 'inexact' data type appropriate for the device's capabilities.
     It defaults to `default_dtype` when provided types are not 'inexact'.
 
-    Args:
-        default_dtype: The default data type. This is determined by the capabilities of
+    Parameters
+    ----------
+    default_dtype : dpnp.dtype
+        The default data type. This is determined by the capabilities of
         the device and is used when none of the provided types are 'inexact'.
         *dtypes: A variable number of data types to be evaluated to find
         the common 'inexact' type.
 
-    Returns:
-        dpnp.result_type (dtype) : The resultant 'inexact' data type for linalg operations,
+    Returns
+    -------
+    dpnp.result_type : dpnp.dtype
+        The resultant 'inexact' data type for linalg operations,
         ensuring computational compatibility.
 
     """
+
     inexact_dtypes = [
         dt if issubdtype(dt, dpnp.inexact) else default_dtype for dt in dtypes
     ]
     return dpnp.result_type(*inexact_dtypes)
 
 
+def _lu_factor(a, res_type):
+    """
+    Compute pivoted LU decomposition.
+
+    Decompose a given batch of square matrices. Inputs and outputs are
+    transposed.
+
+    Parameters
+    ----------
+    a : (..., M, M) {dpnp.ndarray, usm_ndarray}
+        Input array containing the matrices to be decomposed.
+    res_type : dpnp.dtype
+        Specifies the data type of the result.
+        Acceptable data types are float32, float64, complex64, or complex128.
+
+    Returns
+    -------
+    tuple:
+        lu_t : (..., N, N) {dpnp.ndarray, usm_ndarray}
+            Combined 'L' and 'U' matrices from LU decomposition
+            excluding the diagonal of 'L'.
+        piv : (..., N) {dpnp.ndarray, usm_ndarray}
+            1-origin pivot indices indicating row permutations during decomposition.
+        dev_info : (...) {dpnp.ndarray, usm_ndarray}
+            Information on `getrf` or `getrf_batch` computation success (0 for success).
+
+    """
+
+    n = a.shape[-2]
+
+    a_sycl_queue = a.sycl_queue
+    a_usm_type = a.usm_type
+
+    # TODO: Find out at which array sizes the best performance is obtained
+    # getrf_batch implementation shows slow results with large arrays on GPU.
+    # Use getrf_batch only on CPU.
+    # On GPU call getrf for each two-dimensional array by loop
+    use_batch = a.sycl_device.has_aspect_cpu
+
+    if a.ndim > 2:
+        orig_shape = a.shape
+        # get 3d input arrays by reshape
+        a = a.reshape(-1, n, n)
+        batch_size = a.shape[0]
+        a_usm_arr = dpnp.get_usm_ndarray(a)
+
+        if use_batch:
+            # `a` must be copied because getrf_batch destroys the input matrix
+            a_h = dpnp.empty_like(a, order="C", dtype=res_type)
+            ipiv_h = dpnp.empty(
+                (batch_size, n),
+                dtype=dpnp.int64,
+                order="C",
+                usm_type=a_usm_type,
+                sycl_queue=a_sycl_queue,
+            )
+            dev_info_h = [0] * batch_size
+
+            a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+                src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue
+            )
+
+            ipiv_stride = n
+            a_stride = a_h.strides[0]
+
+            # Call the LAPACK extension function _getrf_batch
+            # to perform LU decomposition of a batch of general matrices
+            ht_lapack_ev, _ = li._getrf_batch(
+                a_sycl_queue,
+                a_h.get_array(),
+                ipiv_h.get_array(),
+                dev_info_h,
+                n,
+                a_stride,
+                ipiv_stride,
+                batch_size,
+                [a_copy_ev],
+            )
+
+            ht_lapack_ev.wait()
+            a_ht_copy_ev.wait()
+
+            dev_info_array = dpnp.array(
+                dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+            )
+
+            # Reshape the results back to their original shape
+            a_h = a_h.reshape(orig_shape)
+            ipiv_h = ipiv_h.reshape(orig_shape[:-1])
+            dev_info_array = dev_info_array.reshape(orig_shape[:-2])
+
+            return (a_h, ipiv_h, dev_info_array)
+
+        else:
+            # Initialize lists for storing arrays and events for each batch
+            a_vecs = [None] * batch_size
+            ipiv_vecs = [None] * batch_size
+            dev_info_vecs = [None] * batch_size
+            a_ht_copy_ev = [None] * batch_size
+            ht_lapack_ev = [None] * batch_size
+
+            # Process each batch
+            for i in range(batch_size):
+                # Copy each 2D slice to a new array as getrf destroys the input matrix
+                a_vecs[i] = dpnp.empty_like(a[i], order="C", dtype=res_type)
+                (
+                    a_ht_copy_ev[i],
+                    a_copy_ev,
+                ) = ti._copy_usm_ndarray_into_usm_ndarray(
+                    src=a_usm_arr[i],
+                    dst=a_vecs[i].get_array(),
+                    sycl_queue=a_sycl_queue,
+                )
+                ipiv_vecs[i] = dpnp.empty(
+                    (n,),
+                    dtype=dpnp.int64,
+                    order="C",
+                    usm_type=a_usm_type,
+                    sycl_queue=a_sycl_queue,
+                )
+                dev_info_vecs[i] = [0]
+
+                # Call the LAPACK extension function _getrf
+                # to perform LU decomposition on each batch in 'a_vecs[i]'
+                ht_lapack_ev[i], _ = li._getrf(
+                    a_sycl_queue,
+                    a_vecs[i].get_array(),
+                    ipiv_vecs[i].get_array(),
+                    dev_info_vecs[i],
+                    [a_copy_ev],
+                )
+
+            for i in range(batch_size):
+                ht_lapack_ev[i].wait()
+                a_ht_copy_ev[i].wait()
+
+            # Reshape the results back to their original shape
+            out_a = dpnp.array(a_vecs, order="C").reshape(orig_shape)
+            out_ipiv = dpnp.array(ipiv_vecs).reshape(orig_shape[:-1])
+            out_dev_info = dpnp.array(
+                dev_info_vecs, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+            ).reshape(orig_shape[:-2])
+
+            return (out_a, out_ipiv, out_dev_info)
+
+    else:
+        a_usm_arr = dpnp.get_usm_ndarray(a)
+
+        # `a` must be copied because getrf destroys the input matrix
+        a_h = dpnp.empty_like(a, order="C", dtype=res_type)
+
+        # use DPCTL tensor function to fill the сopy of the input array
+        # from the input array
+        a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+            src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue
+        )
+
+        ipiv_h = dpnp.empty(
+            n,
+            dtype=dpnp.int64,
+            order="C",
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
+        )
+        dev_info_h = [0]
+
+        # Call the LAPACK extension function _getrf
+        # to perform LU decomposition on the input matrix
+        ht_lapack_ev, _ = li._getrf(
+            a_sycl_queue,
+            a_h.get_array(),
+            ipiv_h.get_array(),
+            dev_info_h,
+            [a_copy_ev],
+        )
+
+        ht_lapack_ev.wait()
+        a_ht_copy_ev.wait()
+
+        dev_info_array = dpnp.array(
+            dev_info_h, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+        )
+
+        # Return a tuple containing the factorized matrix 'a_h',
+        # pivot indices 'ipiv_h'
+        # and the status 'dev_info_h' from the LAPACK getrf call
+        return (a_h, ipiv_h, dev_info_array)
+
+
+def dpnp_cholesky_batch(a, upper_lower, res_type):
+    """
+    dpnp_cholesky_batch(a, upper_lower, res_type)
+
+    Return the batched Cholesky decomposition of `a` array.
+
+    """
+
+    a_sycl_queue = a.sycl_queue
+    a_usm_type = a.usm_type
+
+    n = a.shape[-2]
+
+    orig_shape = a.shape
+    # get 3d input arrays by reshape
+    a = a.reshape(-1, n, n)
+    batch_size = a.shape[0]
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+
+    # `a` must be copied because potrf_batch destroys the input matrix
+    a_h = dpnp.empty_like(a, order="C", dtype=res_type, usm_type=a_usm_type)
+
+    # use DPCTL tensor function to fill the сopy of the input array
+    # from the input array
+    a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+        src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue
+    )
+
+    a_stride = a_h.strides[0]
+
+    # Call the LAPACK extension function _potrf_batch
+    # to computes the Cholesky decomposition of a batch of
+    # symmetric positive-definite matrices
+    ht_lapack_ev, _ = li._potrf_batch(
+        a_sycl_queue,
+        a_h.get_array(),
+        upper_lower,
+        n,
+        a_stride,
+        batch_size,
+        [a_copy_ev],
+    )
+
+    ht_lapack_ev.wait()
+    a_ht_copy_ev.wait()
+
+    # Get upper or lower-triangular matrix part as per `upper_lower` value
+    # upper_lower is 0 (lower) or 1 (upper)
+    if upper_lower:
+        a_h = dpnp.triu(a_h.reshape(orig_shape))
+    else:
+        a_h = dpnp.tril(a_h.reshape(orig_shape))
+
+    return a_h
+
+
+def dpnp_cholesky(a, upper):
+    """
+    dpnp_cholesky(a, upper)
+
+    Return the Cholesky decomposition of `a` array.
+
+    """
+
+    a_sycl_queue = a.sycl_queue
+    a_usm_type = a.usm_type
+
+    res_type = _common_type(a)
+
+    a_shape = a.shape
+
+    if a.size == 0:
+        return dpnp.empty(
+            a_shape,
+            dtype=res_type,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
+        )
+
+    # Set `uplo` value for `potrf` and `potrf_batch` function based on the boolean input `upper`.
+    # In oneMKL, `uplo` value of 1 is equivalent to oneapi::mkl::uplo::lower
+    # and `uplo` value of 0 is equivalent to oneapi::mkl::uplo::upper.
+    # However, we adjust this logic based on the array's memory layout.
+    # Note: lower for row-major (which is used here) is upper for column-major layout.
+    # Reference: comment from tbmkl/tests/lapack/unit/dpcpp/potrf_usm/potrf_usm.cpp
+    # This means that if `upper` is False (lower triangular),
+    # we actually use oneapi::mkl::uplo::upper (0) for the row-major layout, and vice versa.
+    upper_lower = int(upper)
+
+    if a.ndim > 2:
+        return dpnp_cholesky_batch(a, upper_lower, res_type)
+
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+
+    # `a` must be copied because potrf destroys the input matrix
+    a_h = dpnp.empty_like(a, order="C", dtype=res_type, usm_type=a_usm_type)
+
+    # use DPCTL tensor function to fill the сopy of the input array
+    # from the input array
+    a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+        src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a_sycl_queue
+    )
+
+    # Call the LAPACK extension function _potrf
+    # to computes the Cholesky decomposition
+    ht_lapack_ev, _ = li._potrf(
+        a_sycl_queue,
+        a_h.get_array(),
+        upper_lower,
+        [a_copy_ev],
+    )
+
+    ht_lapack_ev.wait()
+    a_ht_copy_ev.wait()
+
+    # Get upper or lower-triangular matrix part as per `upper` value
+    if upper:
+        a_h = dpnp.triu(a_h)
+    else:
+        a_h = dpnp.tril(a_h)
+
+    return a_h
+
+
+def dpnp_det(a):
+    """
+    dpnp_det(a)
+
+    Returns the determinant of `a` array.
+
+    """
+
+    a_usm_type = a.usm_type
+    a_sycl_queue = a.sycl_queue
+
+    res_type = _common_type(a)
+
+    a_shape = a.shape
+    shape = a_shape[:-2]
+    n = a_shape[-2]
+
+    if a.size == 0:
+        # empty batch (result is empty, too) or empty matrices det([[]]) == 1
+        det = dpnp.ones(
+            shape,
+            dtype=res_type,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
+        )
+        return det
+
+    lu, ipiv, dev_info = _lu_factor(a, res_type)
+
+    # Transposing 'lu' to swap the last two axes for compatibility
+    # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments.
+    # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported.
+    lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2))
+    diag = dpnp.diagonal(lu_transposed)
+
+    det = dpnp.prod(dpnp.abs(diag), axis=-1)
+
+    sign = _calculate_determinant_sign(ipiv, diag, res_type, n)
+
+    det = sign * det
+    det = det.astype(res_type, copy=False)
+    singular = dev_info > 0
+    det = dpnp.where(singular, res_type.type(0), det)
+
+    return det.reshape(shape)
+
+
 def dpnp_eigh(a, UPLO):
     """
     dpnp_eigh(a, UPLO)
@@ -302,6 +768,145 @@ def dpnp_eigh(a, UPLO):
         return w, out_v
 
 
+def dpnp_inv_batched(a, res_type):
+    """
+    dpnp_inv_batched(a, res_type)
+
+    Return the inverses of each matrix in a batch of matrices `a`.
+
+    The inverse of a matrix is such that if it is multiplied by the original matrix,
+    it results in the identity matrix. This function computes the inverses of a batch
+    of square matrices.
+    """
+
+    orig_shape = a.shape
+    # get 3d input arrays by reshape
+    a = a.reshape(-1, orig_shape[-2], orig_shape[-1])
+    batch_size = a.shape[0]
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+    a_sycl_queue = a.sycl_queue
+    a_usm_type = a.usm_type
+    n = a.shape[1]
+
+    # oneMKL LAPACK getri_batch overwrites `a`
+    a_h = dpnp.empty_like(a, order="C", dtype=res_type, usm_type=a_usm_type)
+    ipiv_h = dpnp.empty(
+        (batch_size, n),
+        dtype=dpnp.int64,
+        usm_type=a_usm_type,
+        sycl_queue=a_sycl_queue,
+    )
+    dev_info = [0] * batch_size
+
+    # use DPCTL tensor function to fill the matrix array
+    # with content from the input array `a`
+    a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+        src=a_usm_arr, dst=a_h.get_array(), sycl_queue=a.sycl_queue
+    )
+
+    ipiv_stride = n
+    a_stride = a_h.strides[0]
+
+    # Call the LAPACK extension function _getrf_batch
+    # to perform LU decomposition of a batch of general matrices
+    ht_getrf_ev, getrf_ev = li._getrf_batch(
+        a_sycl_queue,
+        a_h.get_array(),
+        ipiv_h.get_array(),
+        dev_info,
+        n,
+        a_stride,
+        ipiv_stride,
+        batch_size,
+        [a_copy_ev],
+    )
+
+    _check_lapack_dev_info(dev_info)
+
+    # Call the LAPACK extension function _getri_batch
+    # to compute the inverse of a batch of matrices using the results
+    # from the LU decomposition performed by _getrf_batch
+    ht_getri_ev, _ = li._getri_batch(
+        a_sycl_queue,
+        a_h.get_array(),
+        ipiv_h.get_array(),
+        dev_info,
+        n,
+        a_stride,
+        ipiv_stride,
+        batch_size,
+        [getrf_ev],
+    )
+
+    _check_lapack_dev_info(dev_info)
+
+    ht_getri_ev.wait()
+    ht_getrf_ev.wait()
+    a_ht_copy_ev.wait()
+
+    return a_h.reshape(orig_shape)
+
+
+def dpnp_inv(a):
+    """
+    dpnp_inv(a)
+
+    Return the inverse of `a` matrix.
+
+    The inverse of a matrix is such that if it is multiplied by the original matrix,
+    it results in the identity matrix. This function computes the inverse of a single
+    square matrix.
+
+    """
+
+    res_type = _common_type(a)
+    if a.size == 0:
+        return dpnp.empty_like(a, dtype=res_type)
+
+    if a.ndim >= 3:
+        return dpnp_inv_batched(a, res_type)
+
+    a_usm_arr = dpnp.get_usm_ndarray(a)
+    a_sycl_queue = a.sycl_queue
+    a_usm_type = a.usm_type
+
+    a_order = "C" if a.flags.c_contiguous else "F"
+    a_shape = a.shape
+
+    # oneMKL LAPACK gesv overwrites `a` and assumes fortran-like array as input.
+    # To use C-contiguous arrays, we transpose them before passing to gesv.
+    # This transposition is effective because the input array `a` is square.
+    a_f = dpnp.empty_like(a, order=a_order, dtype=res_type)
+
+    # use DPCTL tensor function to fill the coefficient matrix array
+    # with content from the input array `a`
+    a_ht_copy_ev, a_copy_ev = ti._copy_usm_ndarray_into_usm_ndarray(
+        src=a_usm_arr, dst=a_f.get_array(), sycl_queue=a_sycl_queue
+    )
+
+    b_f = dpnp.eye(
+        a_shape[0],
+        dtype=res_type,
+        order=a_order,
+        sycl_queue=a_sycl_queue,
+        usm_type=a_usm_type,
+    )
+
+    if a_order == "F":
+        ht_lapack_ev, _ = li._gesv(
+            a_sycl_queue, a_f.get_array(), b_f.get_array(), [a_copy_ev]
+        )
+    else:
+        ht_lapack_ev, _ = li._gesv(
+            a_sycl_queue, a_f.T.get_array(), b_f.T.get_array(), [a_copy_ev]
+        )
+
+    ht_lapack_ev.wait()
+    a_ht_copy_ev.wait()
+
+    return b_f
+
+
 def dpnp_solve(a, b):
     """
     dpnp_solve(a, b)
@@ -432,3 +1037,54 @@ def dpnp_solve(a, b):
         a_ht_copy_ev.wait()
 
         return b_f
+
+
+def dpnp_slogdet(a):
+    """
+    dpnp_slogdet(a)
+
+    Returns sign and logarithm of the determinant of `a` array.
+
+    """
+
+    a_usm_type = a.usm_type
+    a_sycl_queue = a.sycl_queue
+
+    res_type = _common_type(a)
+    logdet_dtype = _real_type(res_type)
+
+    a_shape = a.shape
+    shape = a_shape[:-2]
+    n = a_shape[-2]
+
+    if a.size == 0:
+        # empty batch (result is empty, too) or empty matrices det([[]]) == 1
+        sign = dpnp.ones(
+            shape, dtype=res_type, usm_type=a_usm_type, sycl_queue=a_sycl_queue
+        )
+        logdet = dpnp.zeros(
+            shape,
+            dtype=logdet_dtype,
+            usm_type=a_usm_type,
+            sycl_queue=a_sycl_queue,
+        )
+        return sign, logdet
+
+    lu, ipiv, dev_info = _lu_factor(a, res_type)
+
+    # Transposing 'lu' to swap the last two axes for compatibility
+    # with 'dpnp.diagonal' as it does not support 'axis1' and 'axis2' arguments.
+    # TODO: Replace with 'dpnp.diagonal(lu, axis1=-2, axis2=-1)' when supported.
+    lu_transposed = lu.transpose(-2, -1, *range(lu.ndim - 2))
+    diag = dpnp.diagonal(lu_transposed)
+
+    logdet = dpnp.log(dpnp.abs(diag)).sum(axis=-1)
+
+    sign = _calculate_determinant_sign(ipiv, diag, res_type, n)
+
+    logdet = logdet.astype(logdet_dtype, copy=False)
+    singular = dev_info > 0
+    return (
+        dpnp.where(singular, res_type.type(0), sign).reshape(shape),
+        dpnp.where(singular, logdet_dtype.type("-inf"), logdet).reshape(shape),
+    )
diff --git a/dpnp/random/__init__.py b/dpnp/random/__init__.py
index f0ab4e5d9f2..452f4db6bf2 100644
--- a/dpnp/random/__init__.py
+++ b/dpnp/random/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/random/dpnp_algo_random.pyx b/dpnp/random/dpnp_algo_random.pyx
index 5fb1e4ae00d..432d3a3294c 100644
--- a/dpnp/random/dpnp_algo_random.pyx
+++ b/dpnp/random/dpnp_algo_random.pyx
@@ -2,7 +2,7 @@
 # cython: linetrace=True
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/random/dpnp_iface_random.py b/dpnp/random/dpnp_iface_random.py
index 1f2137a0807..827d1127ec7 100644
--- a/dpnp/random/dpnp_iface_random.py
+++ b/dpnp/random/dpnp_iface_random.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/random/dpnp_random_state.py b/dpnp/random/dpnp_random_state.py
index c53ae620b2c..42ed0566ec9 100644
--- a/dpnp/random/dpnp_random_state.py
+++ b/dpnp/random/dpnp_random_state.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/to_numba/__init__.py b/dpnp/to_numba/__init__.py
index aee2bba3ee8..15ab331d9b9 100644
--- a/dpnp/to_numba/__init__.py
+++ b/dpnp/to_numba/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/dpnp/to_numba/dpnp_iface_to_numba.py b/dpnp/to_numba/dpnp_iface_to_numba.py
index fc1ee2dbc10..5c1d07aea05 100644
--- a/dpnp/to_numba/dpnp_iface_to_numba.py
+++ b/dpnp/to_numba/dpnp_iface_to_numba.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/examples/example1.py b/examples/example1.py
index e078a02090d..c96a68a2b49 100644
--- a/examples/example1.py
+++ b/examples/example1.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/examples/example10.py b/examples/example10.py
index e2518423678..655d5d41c00 100644
--- a/examples/example10.py
+++ b/examples/example10.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/examples/example2.py b/examples/example2.py
index 9f0539be2dd..d093bce90b7 100644
--- a/examples/example2.py
+++ b/examples/example2.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/examples/example4.py b/examples/example4.py
index 19adde92bfc..9ee8292077f 100644
--- a/examples/example4.py
+++ b/examples/example4.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/examples/example6.py b/examples/example6.py
index 5cd19651046..183b3e1b199 100644
--- a/examples/example6.py
+++ b/examples/example6.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/examples/example7.py b/examples/example7.py
index a0df2e1eef3..39f1deba92d 100644
--- a/examples/example7.py
+++ b/examples/example7.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/examples/example_bs.py b/examples/example_bs.py
index 488f495303f..dc265a384f9 100644
--- a/examples/example_bs.py
+++ b/examples/example_bs.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/examples/example_cfd.py b/examples/example_cfd.py
index db309065759..92079b3d019 100644
--- a/examples/example_cfd.py
+++ b/examples/example_cfd.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2023, Intel Corporation
+# Copyright (c) 2023-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/examples/example_sum.py b/examples/example_sum.py
index 478194ccbe3..fc7d1da22d5 100644
--- a/examples/example_sum.py
+++ b/examples/example_sum.py
@@ -1,7 +1,7 @@
 # cython: language_level=3
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2023, Intel Corporation
+# Copyright (c) 2023-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/scripts/build_locally.py b/scripts/build_locally.py
index e3e21915178..c8ff30d34ce 100644
--- a/scripts/build_locally.py
+++ b/scripts/build_locally.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -28,8 +28,6 @@
 import subprocess
 import sys
 
-import dpctl
-
 
 def run(
     use_oneapi=True,
@@ -60,16 +58,12 @@ def run(
         cmake_args += [
             "--cmake-executable=" + cmake_executable,
         ]
-    dpctl_module_path = os.path.join(
-        dpctl.get_include(), "..", "resources", "cmake"
-    )
     cmake_args += [
         "--build-type=" + build_type,
         "--generator=" + build_system,
         "--",
         "-DCMAKE_C_COMPILER:PATH=" + c_compiler,
         "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler,
-        "-DDPCTL_MODULE_PATH:PATH=" + dpctl_module_path,
     ]
     if verbose:
         cmake_args += [
diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py
index 5af2ccb3fc6..ad47556c12f 100644
--- a/scripts/gen_coverage.py
+++ b/scripts/gen_coverage.py
@@ -24,9 +24,6 @@ def run(
         )
 
     setup_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-    dpctl_cmake_dir = subprocess.check_output(
-        [sys.executable, "-m", "dpctl", "--cmakedir"]
-    )
 
     cmake_args = [
         sys.executable,
@@ -36,7 +33,6 @@ def run(
         "--",
         "-DCMAKE_C_COMPILER:PATH=" + c_compiler,
         "-DCMAKE_CXX_COMPILER:PATH=" + cxx_compiler,
-        "-DDPCTL_MODULE_PATH=" + dpctl_cmake_dir.decode().rstrip(),
         "-DCMAKE_VERBOSE_MAKEFILE=ON",
         "-DDPNP_GENERATE_COVERAGE=ON",
     ]
diff --git a/tests/conftest.py b/tests/conftest.py
index 6b9d01691a3..f37ad9177a6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/tests/skipped_tests.tbl b/tests/skipped_tests.tbl
index 15572947eee..8eb46d3c983 100644
--- a/tests/skipped_tests.tbl
+++ b/tests/skipped_tests.tbl
@@ -77,7 +77,6 @@ tests/test_strides.py::test_strides_1arg[(10,)-None-degrees]
 tests/test_strides.py::test_strides_1arg[(10,)-None-fabs]
 tests/test_strides.py::test_strides_1arg[(10,)-None-radians]
 tests/test_strides.py::test_strides_erf[(10,)-None]
-tests/test_strides.py::test_strides_reciprocal[(10,)-None]
 
 tests/test_umath.py::test_umaths[('divmod', 'ii')]
 tests/test_umath.py::test_umaths[('divmod', 'll')]
@@ -103,7 +102,6 @@ tests/test_umath.py::test_umaths[('nextafter', 'dd')]
 tests/test_umath.py::test_umaths[('spacing', 'f')]
 tests/test_umath.py::test_umaths[('spacing', 'd')]
 
-tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag_inplace
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_real_inplace
 tests/third_party/cupy/core_tests/test_ndarray_conversion.py::TestNdarrayToBytes_param_0_{shape=()}::test_item
@@ -177,10 +175,6 @@ tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid2
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid3
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3
-tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid3
-tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid4
-tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid5
 tests/third_party/cupy/indexing_tests/test_generate.py::TestAxisConcatenator::test_AxisConcatenator_init1
 tests/third_party/cupy/indexing_tests/test_generate.py::TestAxisConcatenator::test_len
 tests/third_party/cupy/indexing_tests/test_generate.py::TestC_::test_c_1
@@ -403,8 +397,6 @@ tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_para
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticBinary2_param_545_{arg1=array([[1, 2, 3],       [4, 5, 6]]), arg2=array([[0, 1, 2],       [3, 4, 5]]), dtype=float64, name='fmod', use_dtype=False}::test_binary
 tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticModf::test_modf
 
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_3_{name='angle', nargs=1}::test_raises_with_numpy_input
-
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2_infinities
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_frexp
@@ -561,13 +553,6 @@ tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_1_{axis=1}::
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_2_{axis=2}::test_cumsum_arraylike
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_2_{axis=2}::test_cumsum_numpy_array
 
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodAxes_param_0_{axis=(1, 3), shape=(2, 3, 4, 5)}::test_nansum_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodAxes_param_1_{axis=(1, 3), shape=(20, 30, 40, 50)}::test_nansum_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodAxes_param_2_{axis=(0, 2, 3), shape=(2, 3, 4, 5)}::test_nansum_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodAxes_param_3_{axis=(0, 2, 3), shape=(20, 30, 40, 50)}::test_nansum_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodExtra_param_0_{shape=(2, 3, 4)}::test_nansum_out
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodExtra_param_1_{shape=(20, 30, 40)}::test_nansum_out
-
 tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_1dim
 tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_1dim_with_discont
 tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_1dim_with_period
@@ -770,6 +755,7 @@ tests/third_party/cupy/sorting_tests/test_search.py::TestFlatNonzero_param_3_{ar
 tests/third_party/cupy/sorting_tests/test_search.py::TestFlatNonzero_param_4_{array=array([], shape=(0, 2, 0), dtype=float64)}::test_flatnonzero
 tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_param_0_{array=array(0)}::test_nonzero
 tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_param_1_{array=array(1)}::test_nonzero
+
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_axis
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis1
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis2
@@ -800,36 +786,6 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{ext
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_one_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_sequence_kth
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_none_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim_invalid_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_nan1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_nan2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_none_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_zero_dim_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_zero_dim_invalid_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_nan1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_nan2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim
-
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_F_order
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_dtype
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_three_or_more_dim
@@ -839,29 +795,11 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_nan3
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_view
 tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_multi_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_one_dim
-
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_1dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_nan
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_ndim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_external_sort_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_nan1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_nan2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_nan3
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_nan4
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_axis3
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_dtype
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_two_or_more_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_zero_dim
+
 tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef
 tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_diag_exception
 tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar
@@ -953,63 +891,7 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_h
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_density
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_weights_and_density
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_same_value
-
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_weights_mismatch
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_all_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_float16
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_huge
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_out
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_0_{axis=None, keepdims=True, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_0_{axis=None, keepdims=True, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_10_{axis=1, keepdims=False, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_10_{axis=1, keepdims=False, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_11_{axis=1, keepdims=False, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_11_{axis=1, keepdims=False, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_1_{axis=None, keepdims=True, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_1_{axis=None, keepdims=True, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_2_{axis=None, keepdims=False, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_2_{axis=None, keepdims=False, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_3_{axis=None, keepdims=False, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_3_{axis=None, keepdims=False, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_4_{axis=0, keepdims=True, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_4_{axis=0, keepdims=True, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_5_{axis=0, keepdims=True, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_5_{axis=0, keepdims=True, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_6_{axis=0, keepdims=False, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_6_{axis=0, keepdims=False, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_7_{axis=0, keepdims=False, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_7_{axis=0, keepdims=False, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_8_{axis=1, keepdims=True, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_8_{axis=1, keepdims=True, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_9_{axis=1, keepdims=True, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_9_{axis=1, keepdims=True, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStdAdditional::test_nanstd_float16
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStdAdditional::test_nanstd_huge
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStdAdditional::test_nanstd_out
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_0_{axis=None, ddof=0, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_10_{axis=0, ddof=0, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_11_{axis=0, ddof=0, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_12_{axis=0, ddof=1, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_13_{axis=0, ddof=1, keepdims=True, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_14_{axis=0, ddof=1, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_15_{axis=0, ddof=1, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_16_{axis=1, ddof=0, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_17_{axis=1, ddof=0, keepdims=True, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_18_{axis=1, ddof=0, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_19_{axis=1, ddof=0, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_1_{axis=None, ddof=0, keepdims=True, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_20_{axis=1, ddof=1, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_21_{axis=1, ddof=1, keepdims=True, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_22_{axis=1, ddof=1, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_23_{axis=1, ddof=1, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_2_{axis=None, ddof=0, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_3_{axis=None, ddof=0, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_4_{axis=None, ddof=1, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_5_{axis=None, ddof=1, keepdims=True, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_6_{axis=None, ddof=1, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_7_{axis=None, ddof=1, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_8_{axis=0, ddof=0, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_9_{axis=0, ddof=0, keepdims=True, shape=(4, 3, 5)}::test_nanstd
 
 tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_bad_q[linear]
 tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_bad_q[lower]
diff --git a/tests/skipped_tests_gpu.tbl b/tests/skipped_tests_gpu.tbl
index dae91d41ac3..b8c195b9861 100644
--- a/tests/skipped_tests_gpu.tbl
+++ b/tests/skipped_tests_gpu.tbl
@@ -186,7 +186,6 @@ tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-float32]
 tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int64]
 tests/test_linalg.py::test_matrix_rank[None-[[1, 2], [3, 4]]-int32]
 
-tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestAngle::test_angle
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_imag_inplace
 tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py::TestRealImag::test_real_inplace
 
@@ -255,11 +254,6 @@ tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid1
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid2
 tests/third_party/cupy/creation_tests/test_ranges.py::TestMeshgrid_param_7_{copy=True, indexing='ij', sparse=True}::test_meshgrid3
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid3
-tests/third_party/cupy/creation_tests/test_ranges.py::TestMgrid::test_mgrid5
-tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid3
-tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid4
-tests/third_party/cupy/creation_tests/test_ranges.py::TestOgrid::test_ogrid5
 tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_arange_negative_size
 tests/third_party/cupy/creation_tests/test_ranges.py::TestRanges::test_arange_no_dtype_int
 
@@ -500,8 +494,6 @@ tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_par
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_2_{shapes=[(3, 2), (3, 4)]}::test_invalid_broadcast
 tests/third_party/cupy/manipulation_tests/test_dims.py::TestInvalidBroadcast_param_3_{shapes=[(0,), (2,)]}::test_invalid_broadcast
 
-tests/third_party/cupy/math_tests/test_arithmetic.py::TestArithmeticRaisesWithNumpyInput_param_3_{name='angle', nargs=1}::test_raises_with_numpy_input
-
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2
 tests/third_party/cupy/math_tests/test_explog.py::TestExplog::test_logaddexp2_infinities
 tests/third_party/cupy/math_tests/test_floating.py::TestFloating::test_frexp
@@ -665,13 +657,6 @@ tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_1_{axis=1}::
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_2_{axis=2}::test_cumsum_arraylike
 tests/third_party/cupy/math_tests/test_sumprod.py::TestCumsum_param_2_{axis=2}::test_cumsum_numpy_array
 
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodAxes_param_0_{axis=(1, 3), shape=(2, 3, 4, 5)}::test_nansum_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodAxes_param_1_{axis=(1, 3), shape=(20, 30, 40, 50)}::test_nansum_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodAxes_param_2_{axis=(0, 2, 3), shape=(2, 3, 4, 5)}::test_nansum_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodAxes_param_3_{axis=(0, 2, 3), shape=(20, 30, 40, 50)}::test_nansum_axes
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodExtra_param_0_{shape=(2, 3, 4)}::test_nansum_out
-tests/third_party/cupy/math_tests/test_sumprod.py::TestNansumNanprodExtra_param_1_{shape=(20, 30, 40)}::test_nansum_out
-
 tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_1dim
 tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_1dim_with_discont
 tests/third_party/cupy/math_tests/test_trigonometric.py::TestUnwrap::test_unwrap_1dim_with_period
@@ -832,6 +817,7 @@ tests/third_party/cupy/sorting_tests/test_search.py::TestFlatNonzero_param_3_{ar
 tests/third_party/cupy/sorting_tests/test_search.py::TestFlatNonzero_param_4_{array=array([], shape=(0, 2, 0), dtype=float64)}::test_flatnonzero
 tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_param_0_{array=array(0)}::test_nonzero
 tests/third_party/cupy/sorting_tests/test_search.py::TestNonzeroZeroDimension_param_1_{array=array(1)}::test_nonzero
+
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_axis
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis1
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_0_{external=False}::test_argpartition_invalid_axis2
@@ -862,36 +848,6 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{ext
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_one_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_sequence_kth
 tests/third_party/cupy/sorting_tests/test_sort.py::TestArgpartition_param_1_{external=True}::test_argpartition_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_none_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim_invalid_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_nan1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_nan2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_none_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_zero_dim_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_argsort_zero_dim_invalid_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_nan1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_1_{external=True}::test_nan2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_multi_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_original_array_not_modified_one_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestArgsort_param_0_{external=False}::test_argsort_zero_dim
-
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_F_order
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_dtype
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_lexsort_three_or_more_dim
@@ -901,29 +857,11 @@ tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_nan3
 tests/third_party/cupy/sorting_tests/test_sort.py::TestLexsort::test_view
 tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_multi_dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestMsort::test_msort_one_dim
-
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_1dim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_nan
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_ndim
 tests/third_party/cupy/sorting_tests/test_sort.py::TestSort_complex::test_sort_complex_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_external_sort_zero_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_nan1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_nan2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_nan3
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_nan4
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_axis3
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_dtype
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_invalid_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_invalid_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_invalid_negative_axis1
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_invalid_negative_axis2
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_negative_axis
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_non_contiguous
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_two_or_more_dim
-tests/third_party/cupy/sorting_tests/test_sort.py::TestSort::test_sort_zero_dim
+
 tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef
 tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_diag_exception
 tests/third_party/cupy/statistics_tests/test_correlation.py::TestCorrcoef::test_corrcoef_rowvar
@@ -1015,63 +953,7 @@ tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_h
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_density
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_range_with_weights_and_density
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_same_value
-
 tests/third_party/cupy/statistics_tests/test_histogram.py::TestHistogram::test_histogram_weights_mismatch
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_all_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_float16
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_huge
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMeanAdditional::test_nanmean_out
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_0_{axis=None, keepdims=True, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_0_{axis=None, keepdims=True, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_10_{axis=1, keepdims=False, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_10_{axis=1, keepdims=False, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_11_{axis=1, keepdims=False, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_11_{axis=1, keepdims=False, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_1_{axis=None, keepdims=True, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_1_{axis=None, keepdims=True, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_2_{axis=None, keepdims=False, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_2_{axis=None, keepdims=False, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_3_{axis=None, keepdims=False, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_3_{axis=None, keepdims=False, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_4_{axis=0, keepdims=True, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_4_{axis=0, keepdims=True, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_5_{axis=0, keepdims=True, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_5_{axis=0, keepdims=True, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_6_{axis=0, keepdims=False, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_6_{axis=0, keepdims=False, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_7_{axis=0, keepdims=False, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_7_{axis=0, keepdims=False, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_8_{axis=1, keepdims=True, shape=(3, 4)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_8_{axis=1, keepdims=True, shape=(3, 4)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_9_{axis=1, keepdims=True, shape=(30, 40, 50)}::test_nanmean_with_nan_float
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanMean_param_9_{axis=1, keepdims=True, shape=(30, 40, 50)}::test_nanmean_without_nan
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStdAdditional::test_nanstd_float16
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStdAdditional::test_nanstd_huge
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStdAdditional::test_nanstd_out
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_0_{axis=None, ddof=0, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_10_{axis=0, ddof=0, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_11_{axis=0, ddof=0, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_12_{axis=0, ddof=1, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_13_{axis=0, ddof=1, keepdims=True, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_14_{axis=0, ddof=1, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_15_{axis=0, ddof=1, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_16_{axis=1, ddof=0, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_17_{axis=1, ddof=0, keepdims=True, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_18_{axis=1, ddof=0, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_19_{axis=1, ddof=0, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_1_{axis=None, ddof=0, keepdims=True, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_20_{axis=1, ddof=1, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_21_{axis=1, ddof=1, keepdims=True, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_22_{axis=1, ddof=1, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_23_{axis=1, ddof=1, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_2_{axis=None, ddof=0, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_3_{axis=None, ddof=0, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_4_{axis=None, ddof=1, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_5_{axis=None, ddof=1, keepdims=True, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_6_{axis=None, ddof=1, keepdims=False, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_7_{axis=None, ddof=1, keepdims=False, shape=(4, 3, 5)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_8_{axis=0, ddof=0, keepdims=True, shape=(3, 4)}::test_nanstd
-tests/third_party/cupy/statistics_tests/test_meanvar.py::TestNanVarStd_param_9_{axis=0, ddof=0, keepdims=True, shape=(4, 3, 5)}::test_nanstd
 
 tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_bad_q[linear]
 tests/third_party/cupy/statistics_tests/test_order.py::TestOrder::test_percentile_bad_q[lower]
diff --git a/tests/test_absolute.py b/tests/test_absolute.py
index aac8a71ca58..1c6a42e98c6 100644
--- a/tests/test_absolute.py
+++ b/tests/test_absolute.py
@@ -1,10 +1,15 @@
 import numpy
 import pytest
-from numpy.testing import assert_array_equal, assert_equal
+from numpy.testing import assert_equal
 
 import dpnp
 
-from .helper import get_all_dtypes, get_complex_dtypes, get_float_complex_dtypes
+from .helper import (
+    assert_dtype_allclose,
+    get_all_dtypes,
+    get_complex_dtypes,
+    get_float_complex_dtypes,
+)
 
 
 @pytest.mark.parametrize("func", ["abs", "absolute"])
@@ -15,8 +20,13 @@ def test_abs(func, dtype):
 
     result = getattr(dpnp, func)(ia)
     expected = getattr(numpy, func)(a)
-    assert_array_equal(expected, result)
-    assert_equal(result.dtype, expected.dtype)
+    assert_dtype_allclose(result, expected)
+
+    # out keyword
+    dp_out = dpnp.empty(expected.shape, dtype=expected.dtype)
+    result = getattr(dpnp, func)(ia, out=dp_out)
+    assert result is dp_out
+    assert_dtype_allclose(result, expected)
 
 
 @pytest.mark.parametrize("stride", [-4, -2, -1, 1, 2, 4])
diff --git a/tests/test_arithmetic.py b/tests/test_arithmetic.py
index 60dc7a1c9af..6ec18a545f7 100644
--- a/tests/test_arithmetic.py
+++ b/tests/test_arithmetic.py
@@ -1,10 +1,20 @@
 import unittest
 
-import pytest
+import numpy
 
+from tests.helper import has_support_aspect64
 from tests.third_party.cupy import testing
 
 
+# Note: numpy.sum() always upcast integers to (u)int64 and float32 to
+# float64 for dtype=None. `np.sum` does that too for integers, but not for
+# float32, so we need to special-case it for these tests
+def _get_dtype_kwargs(xp, dtype):
+    if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+        return {"dtype": numpy.float64}
+    return {}
+
+
 class TestArithmetic(unittest.TestCase):
     @testing.for_float_dtypes()
     @testing.numpy_cupy_allclose()
@@ -32,7 +42,7 @@ def test_nanprod(self, xp, dtype):
     @testing.numpy_cupy_allclose()
     def test_nansum(self, xp, dtype):
         a = xp.array([-2.5, -1.5, xp.nan, 10.5, 1.5, xp.nan], dtype=dtype)
-        return xp.nansum(a)
+        return xp.nansum(a, **_get_dtype_kwargs(xp, a.dtype))
 
     @testing.for_float_dtypes()
     @testing.numpy_cupy_allclose()
diff --git a/tests/test_bitwise.py b/tests/test_bitwise.py
index f8484eaecb5..e3393e43bee 100644
--- a/tests/test_bitwise.py
+++ b/tests/test_bitwise.py
@@ -4,6 +4,8 @@
 
 import dpnp as inp
 
+from .helper import assert_dtype_allclose, get_integer_dtypes
+
 
 @pytest.mark.parametrize(
     "lhs",
@@ -134,3 +136,15 @@ def test_right_shift(self, lhs, rhs, dtype):
             dp_a >>= dp_b
             np_a >>= np_b
             assert_array_equal(dp_a, np_a)
+
+
+@pytest.mark.parametrize("dtype", get_integer_dtypes())
+def test_invert_out(dtype):
+    np_a = numpy.arange(-5, 5, dtype=dtype)
+    dp_a = inp.array(np_a)
+
+    expected = numpy.invert(np_a)
+    dp_out = inp.empty(expected.shape, dtype=expected.dtype)
+    result = inp.invert(dp_a, out=dp_out)
+    assert result is dp_out
+    assert_dtype_allclose(result, expected)
diff --git a/tests/test_indexing.py b/tests/test_indexing.py
index 4d8229e53ce..2640eb64c49 100644
--- a/tests/test_indexing.py
+++ b/tests/test_indexing.py
@@ -154,8 +154,6 @@ def test_broadcast(self, arr_dt, idx_dt):
 
 
 class TestTakeAlongAxis:
-    # TODO: remove fixture once `dpnp.sort` is fully implemented
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @pytest.mark.parametrize(
         "func, argfunc, kwargs",
         [
@@ -261,6 +259,7 @@ def test_choose():
     assert_array_equal(expected, result)
 
 
+@pytest.mark.parametrize("arr_dtype", get_all_dtypes(no_bool=True))
 @pytest.mark.parametrize("offset", [0, 1], ids=["0", "1"])
 @pytest.mark.parametrize(
     "array",
@@ -296,8 +295,8 @@ def test_choose():
         "[[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]], [[[13, 14, 15], [16, 17, 18]], [[19, 20, 21], [22, 23, 24]]]]",
     ],
 )
-def test_diagonal(array, offset):
-    a = numpy.array(array)
+def test_diagonal(array, arr_dtype, offset):
+    a = numpy.array(array, dtype=arr_dtype)
     ia = dpnp.array(a)
     expected = numpy.diagonal(a, offset)
     result = dpnp.diagonal(ia, offset)
@@ -363,10 +362,13 @@ def test_fill_diagonal(array, val):
         "[3, 2]",
     ],
 )
-def test_indices(dimension):
-    expected = numpy.indices(dimension)
-    result = dpnp.indices(dimension)
-    assert_array_equal(expected, result)
+@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+@pytest.mark.parametrize("sparse", [True, False], ids=["True", "False"])
+def test_indices(dimension, dtype, sparse):
+    expected = numpy.indices(dimension, dtype=dtype, sparse=sparse)
+    result = dpnp.indices(dimension, dtype=dtype, sparse=sparse)
+    for Xnp, X in zip(expected, result):
+        assert_array_equal(Xnp, X)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_linalg.py b/tests/test_linalg.py
index 6c8c2a5f04e..5ea536c2887 100644
--- a/tests/test_linalg.py
+++ b/tests/test_linalg.py
@@ -4,6 +4,7 @@
 from numpy.testing import assert_allclose, assert_array_equal, assert_raises
 
 import dpnp as inp
+from tests.third_party.cupy import testing
 
 from .helper import (
     assert_dtype_allclose,
@@ -44,44 +45,159 @@ def vvsort(val, vec, size, xp):
         vec[:, imax] = temp
 
 
-@pytest.mark.parametrize(
-    "array",
-    [
-        [[[1, -2], [2, 5]]],
-        [[[1.0, -2.0], [2.0, 5.0]]],
-        [[[1.0, -2.0], [2.0, 5.0]], [[1.0, -2.0], [2.0, 5.0]]],
-    ],
-    ids=[
-        "[[[1, -2], [2, 5]]]",
-        "[[[1., -2.], [2., 5.]]]",
-        "[[[1., -2.], [2., 5.]], [[1., -2.], [2., 5.]]]",
-    ],
-)
-def test_cholesky(array):
-    a = numpy.array(array)
-    ia = inp.array(a)
-    result = inp.linalg.cholesky(ia)
-    expected = numpy.linalg.cholesky(a)
-    assert_array_equal(expected, result)
+class TestCholesky:
+    @pytest.mark.parametrize(
+        "array",
+        [
+            [[1, 2], [2, 5]],
+            [[[5, 2], [2, 6]], [[7, 3], [3, 8]], [[3, 1], [1, 4]]],
+            [
+                [[[5, 2], [2, 5]], [[6, 3], [3, 6]]],
+                [[[7, 2], [2, 7]], [[8, 3], [3, 8]]],
+            ],
+        ],
+        ids=[
+            "2D_array",
+            "3D_array",
+            "4D_array",
+        ],
+    )
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_cholesky(self, array, dtype):
+        a = numpy.array(array, dtype=dtype)
+        ia = inp.array(a)
+        result = inp.linalg.cholesky(ia)
+        expected = numpy.linalg.cholesky(a)
+        assert_dtype_allclose(result, expected)
 
+    @pytest.mark.parametrize(
+        "array",
+        [
+            [[1, 2], [2, 5]],
+            [[[5, 2], [2, 6]], [[7, 3], [3, 8]], [[3, 1], [1, 4]]],
+            [
+                [[[5, 2], [2, 5]], [[6, 3], [3, 6]]],
+                [[[7, 2], [2, 7]], [[8, 3], [3, 8]]],
+            ],
+        ],
+        ids=[
+            "2D_array",
+            "3D_array",
+            "4D_array",
+        ],
+    )
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_cholesky_upper(self, array, dtype):
+        ia = inp.array(array, dtype=dtype)
+        result = inp.linalg.cholesky(ia, upper=True)
+
+        if ia.ndim > 2:
+            n = ia.shape[-1]
+            ia_reshaped = ia.reshape(-1, n, n)
+            res_reshaped = result.reshape(-1, n, n)
+            batch_size = ia_reshaped.shape[0]
+            for idx in range(batch_size):
+                # Reconstruct the matrix using the Cholesky decomposition result
+                if inp.issubdtype(dtype, inp.complexfloating):
+                    reconstructed = (
+                        res_reshaped[idx].T.conj() @ res_reshaped[idx]
+                    )
+                else:
+                    reconstructed = res_reshaped[idx].T @ res_reshaped[idx]
+                assert_dtype_allclose(
+                    reconstructed, ia_reshaped[idx], check_type=False
+                )
+        else:
+            # Reconstruct the matrix using the Cholesky decomposition result
+            if inp.issubdtype(dtype, inp.complexfloating):
+                reconstructed = result.T.conj() @ result
+            else:
+                reconstructed = result.T @ result
+            assert_dtype_allclose(reconstructed, ia, check_type=False)
+
+    # upper parameter support will be added in numpy 2.0 version
+    @testing.with_requires("numpy>=2.0")
+    @pytest.mark.parametrize(
+        "array",
+        [
+            [[1, 2], [2, 5]],
+            [[[5, 2], [2, 6]], [[7, 3], [3, 8]], [[3, 1], [1, 4]]],
+            [
+                [[[5, 2], [2, 5]], [[6, 3], [3, 6]]],
+                [[[7, 2], [2, 7]], [[8, 3], [3, 8]]],
+            ],
+        ],
+        ids=[
+            "2D_array",
+            "3D_array",
+            "4D_array",
+        ],
+    )
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_cholesky_upper_numpy(self, array, dtype):
+        a = numpy.array(array, dtype=dtype)
+        ia = inp.array(a)
+        result = inp.linalg.cholesky(ia, upper=True)
+        expected = numpy.linalg.cholesky(a, upper=True)
+        assert_dtype_allclose(result, expected)
 
-@pytest.mark.parametrize(
-    "shape",
-    [
-        (0, 0),
-        (3, 0, 0),
-    ],
-    ids=[
-        "(0, 0)",
-        "(3, 0, 0)",
-    ],
-)
-def test_cholesky_0D(shape):
-    a = numpy.empty(shape)
-    ia = inp.array(a)
-    result = inp.linalg.cholesky(ia)
-    expected = numpy.linalg.cholesky(a)
-    assert_array_equal(expected, result)
+    def test_cholesky_strides(self):
+        a_np = numpy.array(
+            [
+                [5, 2, 0, 0, 1],
+                [2, 6, 0, 0, 2],
+                [0, 0, 7, 0, 0],
+                [0, 0, 0, 4, 0],
+                [1, 2, 0, 0, 5],
+            ]
+        )
+
+        a_dp = inp.array(a_np)
+
+        # positive strides
+        expected = numpy.linalg.cholesky(a_np[::2, ::2])
+        result = inp.linalg.cholesky(a_dp[::2, ::2])
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+        # negative strides
+        expected = numpy.linalg.cholesky(a_np[::-2, ::-2])
+        result = inp.linalg.cholesky(a_dp[::-2, ::-2])
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+    @pytest.mark.parametrize(
+        "shape",
+        [
+            (0, 0),
+            (3, 0, 0),
+            (0, 2, 2),
+        ],
+        ids=[
+            "(0, 0)",
+            "(3, 0, 0)",
+            "(0, 2, 2)",
+        ],
+    )
+    def test_cholesky_empty(self, shape):
+        a = numpy.empty(shape)
+        ia = inp.array(a)
+        result = inp.linalg.cholesky(ia)
+        expected = numpy.linalg.cholesky(a)
+        assert_array_equal(expected, result)
+
+    def test_cholesky_errors(self):
+        a_dp = inp.array([[1, 2], [2, 5]], dtype="float32")
+
+        # unsupported type
+        a_np = inp.asnumpy(a_dp)
+        assert_raises(TypeError, inp.linalg.cholesky, a_np)
+
+        # a.ndim < 2
+        a_dp_ndim_1 = a_dp.flatten()
+        assert_raises(inp.linalg.LinAlgError, inp.linalg.cholesky, a_dp_ndim_1)
+
+        # a is not square
+        a_dp = inp.ones((2, 3))
+        assert_raises(inp.linalg.LinAlgError, inp.linalg.cholesky, a_dp)
 
 
 @pytest.mark.parametrize(
@@ -105,46 +221,118 @@ def test_cond(arr, p):
     assert_array_equal(expected, result)
 
 
-@pytest.mark.parametrize(
-    "array",
-    [
-        [[0, 0], [0, 0]],
-        [[1, 2], [1, 2]],
-        [[1, 2], [3, 4]],
-        [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]],
+class TestDet:
+    # TODO: Remove the use of fixture for test_det
+    # when dpnp.prod() will support complex dtypes on Gen9
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize(
+        "array",
         [
-            [[[1, 2], [3, 4]], [[1, 2], [2, 1]]],
-            [[[1, 3], [3, 1]], [[0, 1], [1, 3]]],
+            [[1, 2], [3, 4]],
+            [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]],
+            [
+                [[[1, 2], [3, 4]], [[1, 2], [2, 1]]],
+                [[[1, 3], [3, 1]], [[0, 1], [1, 3]]],
+            ],
         ],
-    ],
-    ids=[
-        "[[0, 0], [0, 0]]",
-        "[[1, 2], [1, 2]]",
-        "[[1, 2], [3, 4]]",
-        "[[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]]",
-        "[[[[1, 2], [3, 4]], [[1, 2], [2, 1]]], [[[1, 3], [3, 1]], [[0, 1], [1, 3]]]]",
-    ],
-)
-def test_det(array):
-    a = numpy.array(array)
-    ia = inp.array(a)
-    result = inp.linalg.det(ia)
-    expected = numpy.linalg.det(a)
-    assert_allclose(expected, result)
+        ids=[
+            "2D_array",
+            "3D_array",
+            "4D_array",
+        ],
+    )
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_det(self, array, dtype):
+        a = numpy.array(array, dtype=dtype)
+        ia = inp.array(a)
+        result = inp.linalg.det(ia)
+        expected = numpy.linalg.det(a)
+        assert_allclose(expected, result)
 
+    def test_det_strides(self):
+        a_np = numpy.array(
+            [
+                [2, 3, 1, 4, 5],
+                [5, 6, 7, 8, 9],
+                [9, 7, 7, 2, 3],
+                [1, 4, 5, 1, 8],
+                [8, 9, 8, 5, 3],
+            ]
+        )
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-def test_det_empty():
-    a = numpy.empty((0, 0, 2, 2), dtype=numpy.float32)
-    ia = inp.array(a)
+        a_dp = inp.array(a_np)
+
+        # positive strides
+        expected = numpy.linalg.det(a_np[::2, ::2])
+        result = inp.linalg.det(a_dp[::2, ::2])
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+        # negative strides
+        expected = numpy.linalg.det(a_np[::-2, ::-2])
+        result = inp.linalg.det(a_dp[::-2, ::-2])
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+    def test_det_empty(self):
+        a = numpy.empty((0, 0, 2, 2), dtype=numpy.float32)
+        ia = inp.array(a)
+
+        np_det = numpy.linalg.det(a)
+        dpnp_det = inp.linalg.det(ia)
+
+        assert dpnp_det.dtype == np_det.dtype
+        assert dpnp_det.shape == np_det.shape
+
+        assert_allclose(np_det, dpnp_det)
+
+    @pytest.mark.parametrize(
+        "matrix",
+        [
+            [[1, 2], [2, 4]],
+            [[0, 0], [0, 0]],
+            [[1, 1], [1, 1]],
+            [[2, 4], [1, 2]],
+            [[1, 2], [0, 0]],
+            [[1, 0], [2, 0]],
+        ],
+        ids=[
+            "Linearly dependent rows",
+            "Zero matrix",
+            "Identical rows",
+            "Linearly dependent columns",
+            "Zero row",
+            "Zero column",
+        ],
+    )
+    def test_det_singular_matrix(self, matrix):
+        a_np = numpy.array(matrix, dtype="float32")
+        a_dp = inp.array(a_np)
 
-    np_det = numpy.linalg.det(a)
-    dpnp_det = inp.linalg.det(ia)
+        expected = numpy.linalg.slogdet(a_np)
+        result = inp.linalg.slogdet(a_dp)
 
-    assert dpnp_det.dtype == np_det.dtype
-    assert dpnp_det.shape == np_det.shape
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+    # TODO: remove skipif when MKLD-16626 is resolved
+    # _getrf_batch does not raise an error with singular matrices.
+    # Skip running on cpu because dpnp uses _getrf_batch only on cpu.
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
+    def test_det_singular_matrix_3D(self):
+        a_np = numpy.array(
+            [[[1, 2], [3, 4]], [[1, 2], [1, 2]], [[1, 3], [3, 1]]]
+        )
+        a_dp = inp.array(a_np)
 
-    assert_allclose(np_det, dpnp_det)
+        expected = numpy.linalg.det(a_np)
+        result = inp.linalg.det(a_dp)
+
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+    def test_det_errors(self):
+        a_dp = inp.array([[1, 2], [3, 5]], dtype="float32")
+
+        # unsupported type
+        a_np = inp.asnumpy(a_dp)
+        assert_raises(TypeError, inp.linalg.det, a_np)
 
 
 @pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
@@ -247,18 +435,128 @@ def test_eigvals(type):
         assert_allclose(expected, result, atol=0.5)
 
 
-@pytest.mark.parametrize("type", get_all_dtypes(no_bool=True, no_complex=True))
-@pytest.mark.parametrize(
-    "array",
-    [[[1.0, 2.0], [3.0, 4.0]], [[0, 1, 2], [3, 2, -1], [4, -2, 3]]],
-    ids=["[[1., 2.], [3., 4.]]", "[[0, 1, 2], [3, 2, -1], [4, -2, 3]]"],
-)
-def test_inv(type, array):
-    a = numpy.array(array, dtype=type)
-    ia = inp.array(a)
-    result = inp.linalg.inv(ia)
-    expected = numpy.linalg.inv(a)
-    assert_allclose(expected, result, rtol=1e-06)
+class TestInv:
+    @pytest.mark.parametrize(
+        "array",
+        [
+            [[1, 2], [3, 4]],
+            [[[1, 2], [3, 4]], [[1, 2], [2, 1]], [[1, 3], [3, 1]]],
+            [
+                [[[1, 2], [3, 4]], [[1, 2], [2, 1]]],
+                [[[1, 3], [3, 1]], [[0, 1], [1, 3]]],
+            ],
+        ],
+        ids=[
+            "2D_array",
+            "3D_array",
+            "4D_array",
+        ],
+    )
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_inv(self, array, dtype):
+        a = numpy.array(array, dtype=dtype)
+        ia = inp.array(a)
+        result = inp.linalg.inv(ia)
+        expected = numpy.linalg.inv(a)
+        assert_dtype_allclose(result, expected)
+
+    def test_inv_strides(self):
+        a_np = numpy.array(
+            [
+                [2, 3, 1, 4, 5],
+                [5, 6, 7, 8, 9],
+                [9, 7, 7, 2, 3],
+                [1, 4, 5, 1, 8],
+                [8, 9, 8, 5, 3],
+            ]
+        )
+
+        a_dp = inp.array(a_np)
+
+        # positive strides
+        expected = numpy.linalg.inv(a_np[::2, ::2])
+        result = inp.linalg.inv(a_dp[::2, ::2])
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+        # negative strides
+        expected = numpy.linalg.inv(a_np[::-2, ::-2])
+        result = inp.linalg.inv(a_dp[::-2, ::-2])
+        assert_allclose(expected, result, rtol=1e-3, atol=1e-4)
+
+    @pytest.mark.parametrize(
+        "shape",
+        [
+            (0, 0),
+            (3, 0, 0),
+            (0, 2, 2),
+        ],
+        ids=[
+            "(0, 0)",
+            "(3, 0, 0)",
+            "(0, 2, 2)",
+        ],
+    )
+    def test_inv_empty(self, shape):
+        a = numpy.empty(shape)
+        ia = inp.array(a)
+        result = inp.linalg.inv(ia)
+        expected = numpy.linalg.inv(a)
+        assert_dtype_allclose(result, expected)
+
+    # TODO: remove skipif when MKLD-16626 is resolved
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
+    @pytest.mark.parametrize(
+        "matrix",
+        [
+            [[1, 2], [2, 4]],
+            [[0, 0], [0, 0]],
+            [[1, 1], [1, 1]],
+            [[2, 4], [1, 2]],
+            [[1, 2], [0, 0]],
+            [[1, 0], [2, 0]],
+        ],
+        ids=[
+            "Linearly dependent rows",
+            "Zero matrix",
+            "Identical rows",
+            "Linearly dependent columns",
+            "Zero row",
+            "Zero column",
+        ],
+    )
+    def test_inv_singular_matrix(self, matrix):
+        a_np = numpy.array(matrix, dtype="float32")
+        a_dp = inp.array(a_np)
+
+        assert_raises(numpy.linalg.LinAlgError, numpy.linalg.inv, a_np)
+        assert_raises(inp.linalg.LinAlgError, inp.linalg.inv, a_dp)
+
+    # TODO: remove skipif when MKLD-16626 is resolved
+    # _getrf_batch does not raise an error with singular matrices.
+    @pytest.mark.skip("MKLD-16626")
+    def test_inv_singular_matrix_3D(self):
+        a_np = numpy.array(
+            [[[1, 2], [3, 4]], [[1, 2], [1, 2]], [[1, 3], [3, 1]]]
+        )
+        a_dp = inp.array(a_np)
+
+        assert_raises(numpy.linalg.LinAlgError, numpy.linalg.inv, a_np)
+        assert_raises(inp.linalg.LinAlgError, inp.linalg.inv, a_dp)
+
+    def test_inv_errors(self):
+        a_dp = inp.array([[1, 2], [2, 5]], dtype="float32")
+
+        # unsupported type
+        a_np = inp.asnumpy(a_dp)
+        assert_raises(TypeError, inp.linalg.inv, a_np)
+
+        # a.ndim < 2
+        a_dp_ndim_1 = a_dp.flatten()
+        assert_raises(inp.linalg.LinAlgError, inp.linalg.inv, a_dp_ndim_1)
+
+        # a is not square
+        a_dp = inp.ones((2, 3))
+        assert_raises(inp.linalg.LinAlgError, inp.linalg.inv, a_dp)
 
 
 @pytest.mark.parametrize(
@@ -618,3 +916,115 @@ def test_solve_errors(self):
         assert_raises(
             inp.linalg.LinAlgError, inp.linalg.solve, a_dp_ndim_1, b_dp
         )
+
+
+class TestSlogdet:
+    # TODO: Remove the use of fixture for test_slogdet_2d and test_slogdet_3d
+    # when dpnp.prod() will support complex dtypes on Gen9
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_slogdet_2d(self, dtype):
+        a_np = numpy.array([[1, 2], [3, 4]], dtype=dtype)
+        a_dp = inp.array(a_np)
+
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np)
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp)
+
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_slogdet_3d(self, dtype):
+        a_np = numpy.array(
+            [
+                [[1, 2], [3, 4]],
+                [[1, 2], [2, 1]],
+                [[1, 3], [3, 1]],
+            ],
+            dtype=dtype,
+        )
+        a_dp = inp.array(a_np)
+
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np)
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp)
+
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+    def test_slogdet_strides(self):
+        a_np = numpy.array(
+            [
+                [2, 3, 1, 4, 5],
+                [5, 6, 7, 8, 9],
+                [9, 7, 7, 2, 3],
+                [1, 4, 5, 1, 8],
+                [8, 9, 8, 5, 3],
+            ]
+        )
+
+        a_dp = inp.array(a_np)
+
+        # positive strides
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np[::2, ::2])
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp[::2, ::2])
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+        # negative strides
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np[::-2, ::-2])
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp[::-2, ::-2])
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+    @pytest.mark.parametrize(
+        "matrix",
+        [
+            [[1, 2], [2, 4]],
+            [[0, 0], [0, 0]],
+            [[1, 1], [1, 1]],
+            [[2, 4], [1, 2]],
+            [[1, 2], [0, 0]],
+            [[1, 0], [2, 0]],
+        ],
+        ids=[
+            "Linearly dependent rows",
+            "Zero matrix",
+            "Identical rows",
+            "Linearly dependent columns",
+            "Zero row",
+            "Zero column",
+        ],
+    )
+    def test_slogdet_singular_matrix(self, matrix):
+        a_np = numpy.array(matrix, dtype="float32")
+        a_dp = inp.array(a_np)
+
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np)
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp)
+
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+    # TODO: remove skipif when MKLD-16626 is resolved
+    # _getrf_batch does not raise an error with singular matrices.
+    # Skip running on cpu because dpnp uses _getrf_batch only on cpu.
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
+    def test_slogdet_singular_matrix_3D(self):
+        a_np = numpy.array(
+            [[[1, 2], [3, 4]], [[1, 2], [1, 2]], [[1, 3], [3, 1]]]
+        )
+        a_dp = inp.array(a_np)
+
+        sign_expected, logdet_expected = numpy.linalg.slogdet(a_np)
+        sign_result, logdet_result = inp.linalg.slogdet(a_dp)
+
+        assert_allclose(sign_expected, sign_result)
+        assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+
+    def test_slogdet_errors(self):
+        a_dp = inp.array([[1, 2], [3, 5]], dtype="float32")
+
+        # unsupported type
+        a_np = inp.asnumpy(a_dp)
+        assert_raises(TypeError, inp.linalg.slogdet, a_np)
diff --git a/tests/test_logic.py b/tests/test_logic.py
index b9d2a9b4303..1e110b4c717 100644
--- a/tests/test_logic.py
+++ b/tests/test_logic.py
@@ -250,6 +250,11 @@ def test_logical_not(dtype):
     dpnp_res = dpnp.logical_not(a)
     assert_equal(dpnp_res, np_res)
 
+    dp_out = dpnp.empty(np_res.shape, dtype=dpnp.bool)
+    dpnp_res = dpnp.logical_not(a, out=dp_out)
+    assert dpnp_res is dp_out
+    assert_equal(dpnp_res, np_res)
+
 
 @pytest.mark.parametrize(
     "op",
@@ -316,6 +321,13 @@ def test_elemwise_comparison(op, x1, x2, dtype):
     dpnp_res = getattr(dpnp, op)(dp_x1[::-1], dp_x2)
     assert_equal(dpnp_res, np_res)
 
+    # out keyword
+    np_res = getattr(numpy, op)(np_x1, np_x2)
+    dp_out = dpnp.empty(np_res.shape, dtype=dpnp.bool)
+    dpnp_res = getattr(dpnp, op)(dp_x1, dp_x2, out=dp_out)
+    assert dp_out is dpnp_res
+    assert_equal(dpnp_res, np_res)
+
 
 @pytest.mark.parametrize(
     "op",
@@ -374,6 +386,11 @@ def test_comparison_no_broadcast_with_shapes(op, sh1, sh2):
 @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
 def test_finite(op, data, dtype):
     x = dpnp.asarray(data, dtype=dtype)
-    np_res = getattr(dpnp, op)(x)
-    dpnp_res = getattr(numpy, op)(x.asnumpy())
+    np_res = getattr(numpy, op)(x.asnumpy())
+    dpnp_res = getattr(dpnp, op)(x)
+    assert_equal(dpnp_res, np_res)
+
+    dp_out = dpnp.empty(np_res.shape, dtype=dpnp.bool)
+    dpnp_res = getattr(dpnp, op)(x, out=dp_out)
+    assert dp_out is dpnp_res
     assert_equal(dpnp_res, np_res)
diff --git a/tests/test_manipulation.py b/tests/test_manipulation.py
index 576229b1d56..bb5533b0e62 100644
--- a/tests/test_manipulation.py
+++ b/tests/test_manipulation.py
@@ -8,6 +8,7 @@
     get_all_dtypes,
     get_complex_dtypes,
     get_float_dtypes,
+    has_support_aspect64,
 )
 
 testdata = []
@@ -71,11 +72,17 @@ def test_repeat(arr):
     assert_array_equal(expected, result)
 
 
+# TODO: Temporary skipping the test, until Internal CI is updated with
+# recent changed in dpctl regarding dpt.result_type function
+@pytest.mark.skip("Temporary skipping the test")
 def test_result_type():
-    X = [dpnp.ones((2), dtype=dpnp.int64), dpnp.int32, "float16"]
-    X_np = [numpy.ones((2), dtype=numpy.int64), numpy.int32, "float16"]
+    X = [dpnp.ones((2), dtype=dpnp.int64), dpnp.int32, "float32"]
+    X_np = [numpy.ones((2), dtype=numpy.int64), numpy.int32, "float32"]
 
-    assert dpnp.result_type(*X) == numpy.result_type(*X_np)
+    if has_support_aspect64():
+        assert dpnp.result_type(*X) == numpy.result_type(*X_np)
+    else:
+        assert dpnp.result_type(*X) == dpnp.default_float_type(X[0].device)
 
 
 def test_result_type_only_dtypes():
diff --git a/tests/test_mathematical.py b/tests/test_mathematical.py
index 15ca6090868..75735e89bc9 100644
--- a/tests/test_mathematical.py
+++ b/tests/test_mathematical.py
@@ -1,5 +1,6 @@
 from itertools import permutations
 
+import dpctl
 import dpctl.tensor as dpt
 import numpy
 import pytest
@@ -28,6 +29,47 @@
 )
 
 
+class TestAngle:
+    @pytest.mark.parametrize("deg", [True, False])
+    def test_angle_bool(self, deg):
+        dp_a = dpnp.array([True, False])
+        np_a = dp_a.asnumpy()
+
+        expected = numpy.angle(np_a, deg=deg)
+        result = dpnp.angle(dp_a, deg=deg)
+
+        # In Numpy, for boolean arguments the output data type is always default floating data type.
+        # while data type of output in DPNP is determined by Type Promotion Rules.
+        # data type should not be compared
+        assert_allclose(result.asnumpy(), expected)
+
+    @pytest.mark.parametrize(
+        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
+    )
+    @pytest.mark.parametrize("deg", [True, False])
+    def test_angle(self, dtype, deg):
+        dp_a = dpnp.arange(10, dtype=dtype)
+        np_a = dp_a.asnumpy()
+
+        expected = numpy.angle(np_a, deg=deg)
+        result = dpnp.angle(dp_a, deg=deg)
+
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    @pytest.mark.parametrize("deg", [True, False])
+    def test_angle_complex(self, dtype, deg):
+        a = numpy.random.rand(10)
+        b = numpy.random.rand(10)
+        np_a = numpy.array(a + 1j * b, dtype=dtype)
+        dp_a = dpnp.array(np_a)
+
+        expected = numpy.angle(np_a, deg=deg)
+        result = dpnp.angle(dp_a, deg=deg)
+
+        assert_dtype_allclose(result, expected)
+
+
 class TestClip:
     @pytest.mark.parametrize(
         "dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True)
@@ -659,6 +701,17 @@ def test_negative(data, dtype):
     expected = numpy.negative(np_a)
     assert_allclose(result, expected)
 
+    result = -dpnp_a
+    expected = -np_a
+    assert_allclose(result, expected)
+
+    # out keyword
+    if dtype is not None:
+        dp_out = dpnp.empty(expected.shape, dtype=dtype)
+        result = dpnp.negative(dpnp_a, out=dp_out)
+        assert result is dp_out
+        assert_allclose(result, expected)
+
 
 def test_negative_boolean():
     dpnp_a = dpnp.array([True, False])
@@ -685,6 +738,13 @@ def test_positive(data, dtype):
     expected = +np_a
     assert_allclose(result, expected)
 
+    # out keyword
+    if dtype is not None:
+        dp_out = dpnp.empty(expected.shape, dtype=dtype)
+        result = dpnp.positive(dpnp_a, out=dp_out)
+        assert result is dp_out
+        assert_allclose(result, expected)
+
 
 def test_positive_boolean():
     dpnp_a = dpnp.array([True, False])
@@ -693,110 +753,108 @@ def test_positive_boolean():
         dpnp.positive(dpnp_a)
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.parametrize("func", ["prod", "nanprod"])
-@pytest.mark.parametrize("axis", [None, 0, 1, -1, 2, -2, (1, 2), (0, -2)])
-@pytest.mark.parametrize("keepdims", [False, True])
-@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
-def test_prod_nanprod(func, axis, keepdims, dtype):
-    a = numpy.arange(1, 13, dtype=dtype).reshape((2, 2, 3))
-    if func == "nanprod" and dpnp.issubdtype(a.dtype, dpnp.inexact):
-        a[:, :, 2] = numpy.nan
-    ia = dpnp.array(a)
-
-    np_res = getattr(numpy, func)(a, axis=axis, keepdims=keepdims)
-    dpnp_res = getattr(dpnp, func)(ia, axis=axis, keepdims=keepdims)
-
-    assert dpnp_res.shape == np_res.shape
-    assert_allclose(dpnp_res, np_res)
-
-
-@pytest.mark.parametrize("axis", [None, 0, 1, -1, 2, -2, (1, 2), (0, -2)])
-def test_prod_zero_size(axis):
-    a = numpy.empty((2, 3, 0))
-    ia = dpnp.array(a)
-
-    np_res = numpy.prod(a, axis=axis)
-    dpnp_res = dpnp.prod(ia, axis=axis)
-    assert_dtype_allclose(dpnp_res, np_res)
-
-
-@pytest.mark.parametrize("func", ["prod", "nanprod"])
-@pytest.mark.parametrize("axis", [None, 0, 1, -1])
-@pytest.mark.parametrize("keepdims", [False, True])
-def test_prod_nanprod_bool(func, axis, keepdims):
-    a = numpy.arange(2, dtype=numpy.bool_)
-    a = numpy.tile(a, (2, 2))
-    ia = dpnp.array(a)
-
-    np_res = getattr(numpy, func)(a, axis=axis, keepdims=keepdims)
-    dpnp_res = getattr(dpnp, func)(ia, axis=axis, keepdims=keepdims)
-    assert_dtype_allclose(dpnp_res, np_res)
-
-
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@pytest.mark.usefixtures("suppress_complex_warning")
-@pytest.mark.usefixtures("suppress_invalid_numpy_warnings")
-@pytest.mark.parametrize("func", ["prod", "nanprod"])
-@pytest.mark.parametrize("in_dtype", get_all_dtypes(no_bool=True))
-@pytest.mark.parametrize(
-    "out_dtype", get_all_dtypes(no_bool=True, no_none=True)
-)
-def test_prod_nanprod_dtype(func, in_dtype, out_dtype):
-    a = numpy.arange(1, 13, dtype=in_dtype).reshape((2, 2, 3))
-    if func == "nanprod" and dpnp.issubdtype(a.dtype, dpnp.inexact):
-        a[:, :, 2] = numpy.nan
-    ia = dpnp.array(a)
-
-    np_res = getattr(numpy, func)(a, dtype=out_dtype)
-    dpnp_res = getattr(dpnp, func)(ia, dtype=out_dtype)
-    assert_dtype_allclose(dpnp_res, np_res)
-
-
-@pytest.mark.usefixtures("suppress_overflow_encountered_in_cast_numpy_warnings")
-@pytest.mark.parametrize("func", ["prod", "nanprod"])
-def test_prod_nanprod_out(func):
-    ia = dpnp.arange(1, 7).reshape((2, 3))
-    ia = ia.astype(dpnp.default_float_type(ia.device))
-    if func == "nanprod":
-        ia[:, 1] = dpnp.nan
-    a = dpnp.asnumpy(ia)
-
-    # output is dpnp_array
-    np_res = getattr(numpy, func)(a, axis=0)
-    dpnp_out = dpnp.empty(np_res.shape, dtype=np_res.dtype)
-    dpnp_res = getattr(dpnp, func)(ia, axis=0, out=dpnp_out)
-    assert dpnp_out is dpnp_res
-    assert_allclose(dpnp_res, np_res)
-
-    # output is usm_ndarray
-    dpt_out = dpt.empty(np_res.shape, dtype=np_res.dtype)
-    dpnp_res = getattr(dpnp, func)(ia, axis=0, out=dpt_out)
-    assert dpt_out is dpnp_res.get_array()
-    assert_allclose(dpnp_res, np_res)
-
-    # out is a numpy array -> TypeError
-    dpnp_res = numpy.empty_like(np_res)
-    with pytest.raises(TypeError):
-        getattr(dpnp, func)(ia, axis=0, out=dpnp_res)
+class TestProd:
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.parametrize("func", ["prod", "nanprod"])
+    @pytest.mark.parametrize("axis", [None, 0, 1, -1, 2, -2, (1, 2), (0, -2)])
+    @pytest.mark.parametrize("keepdims", [False, True])
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_prod_nanprod(self, func, axis, keepdims, dtype):
+        a = numpy.arange(1, 13, dtype=dtype).reshape((2, 2, 3))
+        if func == "nanprod" and dpnp.issubdtype(a.dtype, dpnp.inexact):
+            a[:, :, 2] = numpy.nan
+        ia = dpnp.array(a)
+
+        np_res = getattr(numpy, func)(a, axis=axis, keepdims=keepdims)
+        dpnp_res = getattr(dpnp, func)(ia, axis=axis, keepdims=keepdims)
+
+        assert dpnp_res.shape == np_res.shape
+        assert_allclose(dpnp_res, np_res)
+
+    @pytest.mark.parametrize("axis", [None, 0, 1, -1, 2, -2, (1, 2), (0, -2)])
+    def test_prod_zero_size(self, axis):
+        a = numpy.empty((2, 3, 0))
+        ia = dpnp.array(a)
+
+        np_res = numpy.prod(a, axis=axis)
+        dpnp_res = dpnp.prod(ia, axis=axis)
+        assert_dtype_allclose(dpnp_res, np_res)
+
+    @pytest.mark.parametrize("func", ["prod", "nanprod"])
+    @pytest.mark.parametrize("axis", [None, 0, 1, -1])
+    @pytest.mark.parametrize("keepdims", [False, True])
+    def test_prod_nanprod_bool(self, func, axis, keepdims):
+        a = numpy.arange(2, dtype=numpy.bool_)
+        a = numpy.tile(a, (2, 2))
+        ia = dpnp.array(a)
+
+        np_res = getattr(numpy, func)(a, axis=axis, keepdims=keepdims)
+        dpnp_res = getattr(dpnp, func)(ia, axis=axis, keepdims=keepdims)
+        assert_dtype_allclose(dpnp_res, np_res)
 
-    # incorrect shape for out
-    dpnp_res = dpnp.array(numpy.empty((2, 3)))
-    with pytest.raises(ValueError):
-        getattr(dpnp, func)(ia, axis=0, out=dpnp_res)
+    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
+    @pytest.mark.usefixtures("suppress_complex_warning")
+    @pytest.mark.usefixtures("suppress_invalid_numpy_warnings")
+    @pytest.mark.parametrize("func", ["prod", "nanprod"])
+    @pytest.mark.parametrize("in_dtype", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize(
+        "out_dtype", get_all_dtypes(no_bool=True, no_none=True)
+    )
+    def test_prod_nanprod_dtype(self, func, in_dtype, out_dtype):
+        a = numpy.arange(1, 13, dtype=in_dtype).reshape((2, 2, 3))
+        if func == "nanprod" and dpnp.issubdtype(a.dtype, dpnp.inexact):
+            a[:, :, 2] = numpy.nan
+        ia = dpnp.array(a)
+
+        np_res = getattr(numpy, func)(a, dtype=out_dtype)
+        dpnp_res = getattr(dpnp, func)(ia, dtype=out_dtype)
+        assert_dtype_allclose(dpnp_res, np_res)
+
+    @pytest.mark.usefixtures(
+        "suppress_overflow_encountered_in_cast_numpy_warnings"
+    )
+    @pytest.mark.parametrize("func", ["prod", "nanprod"])
+    def test_prod_nanprod_out(self, func):
+        ia = dpnp.arange(1, 7).reshape((2, 3))
+        ia = ia.astype(dpnp.default_float_type(ia.device))
+        if func == "nanprod":
+            ia[:, 1] = dpnp.nan
+        a = dpnp.asnumpy(ia)
+
+        # output is dpnp_array
+        np_res = getattr(numpy, func)(a, axis=0)
+        dpnp_out = dpnp.empty(np_res.shape, dtype=np_res.dtype)
+        dpnp_res = getattr(dpnp, func)(ia, axis=0, out=dpnp_out)
+        assert dpnp_out is dpnp_res
+        assert_allclose(dpnp_res, np_res)
+
+        # output is usm_ndarray
+        dpt_out = dpt.empty(np_res.shape, dtype=np_res.dtype)
+        dpnp_res = getattr(dpnp, func)(ia, axis=0, out=dpt_out)
+        assert dpt_out is dpnp_res.get_array()
+        assert_allclose(dpnp_res, np_res)
+
+        # out is a numpy array -> TypeError
+        dpnp_res = numpy.empty_like(np_res)
+        with pytest.raises(TypeError):
+            getattr(dpnp, func)(ia, axis=0, out=dpnp_res)
 
+        # incorrect shape for out
+        dpnp_res = dpnp.array(numpy.empty((2, 3)))
+        with pytest.raises(ValueError):
+            getattr(dpnp, func)(ia, axis=0, out=dpnp_res)
 
-def test_prod_nanprod_Error():
-    ia = dpnp.arange(5)
+    def test_prod_nanprod_Error(self):
+        ia = dpnp.arange(5)
 
-    with pytest.raises(TypeError):
-        dpnp.prod(dpnp.asnumpy(ia))
-    with pytest.raises(TypeError):
-        dpnp.nanprod(dpnp.asnumpy(ia))
-    with pytest.raises(NotImplementedError):
-        dpnp.prod(ia, where=False)
-    with pytest.raises(NotImplementedError):
-        dpnp.prod(ia, initial=6)
+        with pytest.raises(TypeError):
+            dpnp.prod(dpnp.asnumpy(ia))
+        with pytest.raises(TypeError):
+            dpnp.nanprod(dpnp.asnumpy(ia))
+        with pytest.raises(NotImplementedError):
+            dpnp.prod(ia, where=False)
+        with pytest.raises(NotImplementedError):
+            dpnp.prod(ia, initial=6)
 
 
 @pytest.mark.parametrize(
@@ -812,7 +870,14 @@ def test_sign(data, dtype):
 
     result = dpnp.sign(dpnp_a)
     expected = numpy.sign(np_a)
-    assert_allclose(result, expected)
+    assert_dtype_allclose(result, expected)
+
+    # out keyword
+    if dtype is not None:
+        dp_out = dpnp.empty(expected.shape, dtype=expected.dtype)
+        result = dpnp.sign(dpnp_a, out=dp_out)
+        assert dp_out is result
+        assert_dtype_allclose(result, expected)
 
 
 def test_sign_boolean():
@@ -834,9 +899,20 @@ def test_signbit(data, dtype):
 
     result = dpnp.signbit(dpnp_a)
     expected = numpy.signbit(np_a)
-    assert_allclose(result, expected)
+    assert_dtype_allclose(result, expected)
+
+    # out keyword
+    dp_out = dpnp.empty(expected.shape, dtype=expected.dtype)
+    result = dpnp.signbit(dpnp_a, out=dp_out)
+    assert dp_out is result
+    assert_dtype_allclose(result, expected)
 
 
+@pytest.mark.parametrize(
+    "func",
+    ["real", "imag", "conj"],
+    ids=["real", "imag", "conj"],
+)
 @pytest.mark.parametrize(
     "data",
     [complex(-1, -4), complex(-1, 2), complex(3, -7), complex(4, 12)],
@@ -848,17 +924,20 @@ def test_signbit(data, dtype):
     ],
 )
 @pytest.mark.parametrize("dtype", get_complex_dtypes())
-def test_real_imag(data, dtype):
+def test_complex_funcs(func, data, dtype):
     np_a = numpy.array(data, dtype=dtype)
     dpnp_a = dpnp.array(data, dtype=dtype)
 
-    result = dpnp.real(dpnp_a)
-    expected = numpy.real(np_a)
-    assert_allclose(result, expected)
+    result = getattr(dpnp, func)(dpnp_a)
+    expected = getattr(numpy, func)(np_a)
+    assert_dtype_allclose(result, expected)
 
-    result = dpnp.imag(dpnp_a)
-    expected = numpy.imag(np_a)
-    assert_allclose(result, expected)
+    # out keyword
+    if func == "conj":
+        dp_out = dpnp.empty(expected.shape, dtype=expected.dtype)
+        result = getattr(dpnp, func)(dpnp_a, out=dp_out)
+        assert dp_out is result
+        assert_dtype_allclose(result, expected)
 
 
 @pytest.mark.parametrize("dtype", get_complex_dtypes())
@@ -876,9 +955,16 @@ def test_projection_infinity(dtype):
         complex(dpnp.inf, 0.0),
     ]
 
-    result = dpnp.proj(dpnp.array(X, dtype=dtype))
+    a = dpnp.array(X, dtype=dtype)
+    result = dpnp.proj(a)
     expected = dpnp.array(Y, dtype=dtype)
-    assert_allclose(result, expected)
+    assert_dtype_allclose(result, expected)
+
+    # out keyword
+    dp_out = dpnp.empty(expected.shape, dtype=expected.dtype)
+    result = dpnp.proj(a, out=dp_out)
+    assert dp_out is result
+    assert_dtype_allclose(result, expected)
 
 
 @pytest.mark.parametrize("dtype", get_all_dtypes())
@@ -1115,7 +1201,9 @@ def test_invalid_dtype(self, dtype):
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
 
-        with pytest.raises(TypeError):
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
             dpnp.ceil(dp_array, out=dp_out)
 
     @pytest.mark.parametrize("dtype", get_float_dtypes())
@@ -1155,7 +1243,9 @@ def test_invalid_dtype(self, dtype):
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
 
-        with pytest.raises(TypeError):
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
             dpnp.floor(dp_array, out=dp_out)
 
     @pytest.mark.parametrize("dtype", get_float_dtypes())
@@ -1195,7 +1285,9 @@ def test_invalid_dtype(self, dtype):
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
 
-        with pytest.raises(TypeError):
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
             dpnp.trunc(dp_array, out=dp_out)
 
     @pytest.mark.parametrize("dtype", get_float_dtypes())
@@ -1246,7 +1338,9 @@ def test_out_dtypes(self, dtype):
         dp_out = dpnp.empty(size, dtype=dpnp.complex64)
         if dtype != dpnp.complex64:
             # dtype of out mismatches types of input arrays
-            with pytest.raises(TypeError):
+            # TODO: change it to ValueError, when dpctl
+            # is being used in internal CI
+            with pytest.raises((TypeError, ValueError)):
                 dpnp.add(dp_array1, dp_array2, out=dp_out)
 
             # allocate new out with expected type
@@ -1343,7 +1437,9 @@ def test_out_dtypes(self, dtype):
         check_dtype = True
         if dtype != dpnp.complex64:
             # dtype of out mismatches types of input arrays
-            with pytest.raises(TypeError):
+            # TODO: change it to ValueError, when dpctl
+            # is being used in internal CI
+            with pytest.raises((TypeError, ValueError)):
                 dpnp.divide(dp_array1, dp_array2, out=dp_out)
 
             # allocate new out with expected type
@@ -1444,7 +1540,9 @@ def test_out_dtypes(self, dtype):
         dp_out = dpnp.empty(size, dtype=dpnp.complex64)
         if dtype != dpnp.complex64:
             # dtype of out mismatches types of input arrays
-            with pytest.raises(TypeError):
+            # TODO: change it to ValueError, when dpctl
+            # is being used in internal CI
+            with pytest.raises((TypeError, ValueError)):
                 dpnp.floor_divide(dp_array1, dp_array2, out=dp_out)
 
             # allocate new out with expected type
@@ -1704,7 +1802,9 @@ def test_out_dtypes(self, dtype):
         dp_out = dpnp.empty(size, dtype=dpnp.float32)
         if dtype != dpnp.float32:
             # dtype of out mismatches types of input arrays
-            with pytest.raises(TypeError):
+            # TODO: change it to ValueError, when dpctl
+            # is being used in internal CI
+            with pytest.raises((TypeError, ValueError)):
                 dpnp.hypot(dp_array1, dp_array2, out=dp_out)
 
             # allocate new out with expected type
@@ -1872,7 +1972,9 @@ def test_out_dtypes(self, dtype):
         dp_out = dpnp.empty(size, dtype=dpnp.complex64)
         if dtype != dpnp.complex64:
             # dtype of out mismatches types of input arrays
-            with pytest.raises(TypeError):
+            # TODO: change it to ValueError, when dpctl
+            # is being used in internal CI
+            with pytest.raises((TypeError, ValueError)):
                 dpnp.maximum(dp_array1, dp_array2, out=dp_out)
 
             # allocate new out with expected type
@@ -1953,7 +2055,9 @@ def test_out_dtypes(self, dtype):
         dp_out = dpnp.empty(size, dtype=dpnp.complex64)
         if dtype != dpnp.complex64:
             # dtype of out mismatches types of input arrays
-            with pytest.raises(TypeError):
+            # TODO: change it to ValueError, when dpctl
+            # is being used in internal CI
+            with pytest.raises((TypeError, ValueError)):
                 dpnp.minimum(dp_array1, dp_array2, out=dp_out)
 
             # allocate new out with expected type
@@ -2034,7 +2138,9 @@ def test_out_dtypes(self, dtype):
         dp_out = dpnp.empty(size, dtype=dpnp.complex64)
         if dtype != dpnp.complex64:
             # dtype of out mismatches types of input arrays
-            with pytest.raises(TypeError):
+            # TODO: change it to ValueError, when dpctl
+            # is being used in internal CI
+            with pytest.raises((TypeError, ValueError)):
                 dpnp.multiply(dp_array1, dp_array2, out=dp_out)
 
             # allocate new out with expected type
@@ -2129,7 +2235,9 @@ def test_out_dtypes(self, dtype):
         dp_out = dpnp.empty(size, dtype=dpnp.complex64)
         if dtype != dpnp.complex64:
             # dtype of out mismatches types of input arrays
-            with pytest.raises(TypeError):
+            # TODO: change it to ValueError, when dpctl
+            # is being used in internal CI
+            with pytest.raises((TypeError, ValueError)):
                 dpnp.power(dp_array1, dp_array2, out=dp_out)
 
             # allocate new out with expected type
@@ -2314,6 +2422,71 @@ def test_sum(shape, dtype_in, dtype_out, transpose, keepdims, order):
         assert_array_equal(numpy_res, dpnp_res.asnumpy())
 
 
+class TestNanSum:
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    @pytest.mark.parametrize("axis", [None, 0, 1, (0, 1)])
+    @pytest.mark.parametrize("keepdims", [True, False])
+    def test_nansum(self, dtype, axis, keepdims):
+        dp_array = dpnp.array([[dpnp.nan, 1, 2], [3, dpnp.nan, 0]], dtype=dtype)
+        np_array = dpnp.asnumpy(dp_array)
+
+        expected = numpy.nansum(np_array, axis=axis, keepdims=keepdims)
+        result = dpnp.nansum(dp_array, axis=axis, keepdims=keepdims)
+        assert_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    def test_nansum_complex(self, dtype):
+        x1 = numpy.random.rand(10)
+        x2 = numpy.random.rand(10)
+        a = numpy.array(x1 + 1j * x2, dtype=dtype)
+        a[::3] = numpy.nan
+        ia = dpnp.array(a)
+
+        expected = numpy.nansum(a)
+        result = dpnp.nansum(ia)
+
+        # use only type kinds check when dpnp handles complex64 arrays
+        # since `dpnp.sum()` and `numpy.sum()` return different dtypes
+        assert_dtype_allclose(
+            result, expected, check_only_type_kind=(dtype == dpnp.complex64)
+        )
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_nansum_out(self, dtype, axis):
+        dp_array = dpnp.array([[dpnp.nan, 1, 2], [3, dpnp.nan, 0]], dtype=dtype)
+        np_array = dpnp.asnumpy(dp_array)
+
+        expected = numpy.nansum(np_array, axis=axis)
+        out = dpnp.empty_like(dpnp.asarray(expected))
+        result = dpnp.nansum(dp_array, axis=axis, out=out)
+        assert out is result
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_nansum_dtype(self, dtype):
+        dp_array = dpnp.array([[dpnp.nan, 1, 2], [3, dpnp.nan, 0]])
+        np_array = dpnp.asnumpy(dp_array)
+
+        expected = numpy.nansum(np_array, dtype=dtype)
+        result = dpnp.nansum(dp_array, dtype=dtype)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_nansum_strided(self, dtype):
+        dp_array = dpnp.arange(20, dtype=dtype)
+        dp_array[::3] = dpnp.nan
+        np_array = dpnp.asnumpy(dp_array)
+
+        result = dpnp.nansum(dp_array[::-1])
+        expected = numpy.nansum(np_array[::-1])
+        assert_allclose(result, expected)
+
+        result = dpnp.nansum(dp_array[::2])
+        expected = numpy.nansum(np_array[::2])
+        assert_allclose(result, expected)
+
+
 @pytest.mark.parametrize(
     "dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True)
 )
@@ -2326,3 +2499,367 @@ def test_inplace_remainder(dtype):
     dp_a %= 4
 
     assert_allclose(dp_a, np_a)
+
+
+@pytest.mark.parametrize(
+    "dtype", get_all_dtypes(no_bool=True, no_none=True, no_complex=True)
+)
+def test_inplace_floor_divide(dtype):
+    size = 21
+    np_a = numpy.arange(size, dtype=dtype)
+    dp_a = dpnp.arange(size, dtype=dtype)
+
+    np_a //= 4
+    dp_a //= 4
+
+    assert_allclose(dp_a, np_a)
+
+
+class TestMatmul:
+    @pytest.mark.parametrize(
+        "order_pair", [("C", "C"), ("C", "F"), ("F", "C"), ("F", "F")]
+    )
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((4,), (4,)),
+            ((4,), (4, 2)),
+            ((2, 4), (4,)),
+            ((2, 4), (4, 3)),
+            ((1, 2, 3), (1, 3, 5)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((1, 2, 3), (4, 3, 5)),
+            ((2, 3), (4, 3, 5)),
+            ((4, 2, 3), (1, 3, 5)),
+            ((4, 2, 3), (3, 5)),
+            ((1, 1, 4, 3), (1, 1, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+            ((6, 7, 4, 3), (1, 1, 3, 5)),
+            ((6, 7, 4, 3), (1, 3, 5)),
+            ((6, 7, 4, 3), (3, 5)),
+            ((6, 7, 4, 3), (1, 7, 3, 5)),
+            ((6, 7, 4, 3), (7, 3, 5)),
+            ((6, 7, 4, 3), (6, 1, 3, 5)),
+            ((1, 1, 4, 3), (6, 7, 3, 5)),
+            ((1, 4, 3), (6, 7, 3, 5)),
+            ((4, 3), (6, 7, 3, 5)),
+            ((6, 1, 4, 3), (6, 7, 3, 5)),
+            ((1, 7, 4, 3), (6, 7, 3, 5)),
+            ((7, 4, 3), (6, 7, 3, 5)),
+            ((1, 5, 3, 2), (6, 5, 2, 4)),
+            ((5, 3, 2), (6, 5, 2, 4)),
+            ((1, 3, 3), (10, 1, 3, 1)),
+        ],
+    )
+    def test_matmul(self, order_pair, shape_pair):
+        order1, order2 = order_pair
+        shape1, shape2 = shape_pair
+        a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1)
+        a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+        a1 = numpy.array(a1, order=order1)
+        a2 = numpy.array(a2, order=order2)
+
+        b1 = dpnp.asarray(a1)
+        b2 = dpnp.asarray(a2)
+
+        result = dpnp.matmul(b1, b2)
+        expected = numpy.matmul(a1, a2)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "order_pair", [("C", "C"), ("C", "F"), ("F", "C"), ("F", "F")]
+    )
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((2, 0), (0, 3)),
+            ((0, 4), (4, 3)),
+            ((2, 4), (4, 0)),
+            ((1, 2, 3), (0, 3, 5)),
+            ((0, 2, 3), (1, 3, 5)),
+            ((2, 3), (0, 3, 5)),
+            ((0, 2, 3), (3, 5)),
+            ((0, 0, 4, 3), (1, 1, 3, 5)),
+            ((6, 0, 4, 3), (1, 3, 5)),
+            ((0, 7, 4, 3), (3, 5)),
+            ((0, 7, 4, 3), (1, 7, 3, 5)),
+            ((0, 7, 4, 3), (7, 3, 5)),
+            ((6, 0, 4, 3), (6, 1, 3, 5)),
+            ((1, 1, 4, 3), (0, 0, 3, 5)),
+            ((1, 4, 3), (6, 0, 3, 5)),
+            ((4, 3), (0, 0, 3, 5)),
+            ((6, 1, 4, 3), (6, 0, 3, 5)),
+            ((1, 7, 4, 3), (0, 7, 3, 5)),
+            ((7, 4, 3), (0, 7, 3, 5)),
+        ],
+    )
+    def test_matmul_empty(self, order_pair, shape_pair):
+        order1, order2 = order_pair
+        shape1, shape2 = shape_pair
+        a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1)
+        a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+        a1 = numpy.array(a1, order=order1)
+        a2 = numpy.array(a2, order=order2)
+
+        b1 = dpnp.asarray(a1)
+        b2 = dpnp.asarray(a2)
+
+        result = dpnp.matmul(b1, b2)
+        expected = numpy.matmul(a1, a2)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((2, 4), (4, 3)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4, 3))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_matmul_bool(self, shape_pair):
+        shape1, shape2 = shape_pair
+        a1 = numpy.resize(
+            numpy.arange(2, dtype=numpy.bool_), numpy.prod(shape1)
+        ).reshape(shape1)
+        a2 = numpy.resize(
+            numpy.arange(2, dtype=numpy.bool_), numpy.prod(shape2)
+        ).reshape(shape2)
+
+        b1 = dpnp.asarray(a1)
+        b2 = dpnp.asarray(a2)
+
+        result = dpnp.matmul(b1, b2)
+        expected = numpy.matmul(a1, a2)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((2, 4), (4, 3)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4, 3))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_matmul_dtype(self, dtype, shape_pair):
+        shape1, shape2 = shape_pair
+        a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1)
+        a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+
+        b1 = dpnp.asarray(a1)
+        b2 = dpnp.asarray(a2)
+
+        result = dpnp.matmul(b1, b2, dtype=dtype)
+        expected = numpy.matmul(a1, a2, dtype=dtype)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype1", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize(
+        "dtype2", get_all_dtypes(no_bool=True, no_none=True)
+    )
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((2, 4), (4, 3)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4, 3))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_matmul_dtype_matrix_inputs(self, dtype1, dtype2, shape_pair):
+        shape1, shape2 = shape_pair
+        a1 = numpy.arange(numpy.prod(shape1), dtype=dtype1).reshape(shape1)
+        a2 = numpy.arange(numpy.prod(shape2), dtype=dtype1).reshape(shape2)
+
+        b1 = dpnp.asarray(a1)
+        b2 = dpnp.asarray(a2)
+
+        if dpnp.can_cast(dpnp.result_type(b1, b2), dtype2, casting="same_kind"):
+            result = dpnp.matmul(b1, b2, dtype=dtype2)
+            expected = numpy.matmul(a1, a2, dtype=dtype2)
+            assert_dtype_allclose(result, expected)
+        else:
+            with pytest.raises(TypeError):
+                dpnp.matmul(b1, b2, dtype=dtype2)
+
+    # TODO: Temporary skipping the test, until Internal CI is updated with
+    # recent changed in dpctl regarding dpt.result_type function
+    @pytest.mark.skip("Temporary skipping the test")
+    @pytest.mark.parametrize("dtype1", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize("dtype2", get_all_dtypes(no_bool=True))
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((2, 4), (4, 3)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4, 3))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_matmul_dtype_matrix_inout(self, dtype1, dtype2, shape_pair):
+        shape1, shape2 = shape_pair
+        a1 = numpy.arange(numpy.prod(shape1), dtype=dtype1).reshape(shape1)
+        a2 = numpy.arange(numpy.prod(shape2), dtype=dtype2).reshape(shape2)
+
+        b1 = dpnp.asarray(a1)
+        b2 = dpnp.asarray(a2)
+
+        result = dpnp.matmul(b1, b2)
+        expected = numpy.matmul(a1, a2)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("order", ["C", "F", "K", "A"])
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((2, 4), (4, 3)),
+            ((4, 2, 3), (4, 3, 5)),
+            ((6, 7, 4, 3), (6, 7, 3, 5)),
+        ],
+        ids=[
+            "((2, 4), (4, 3))",
+            "((4, 2, 3), (4, 3, 5))",
+            "((6, 7, 4, 3), (6, 7, 3, 5))",
+        ],
+    )
+    def test_matmul_order(self, order, shape_pair):
+        shape1, shape2 = shape_pair
+        a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1)
+        a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+
+        b1 = dpnp.asarray(a1)
+        b2 = dpnp.asarray(a2)
+
+        result = dpnp.matmul(b1, b2, order=order)
+        expected = numpy.matmul(a1, a2, order=order)
+        assert result.flags.c_contiguous == expected.flags.c_contiguous
+        assert result.flags.f_contiguous == expected.flags.f_contiguous
+        assert_dtype_allclose(result, expected)
+
+    def test_matmul_strided(self):
+        for dim in [1, 2, 3, 4]:
+            A = numpy.random.rand(*([20] * dim))
+            B = dpnp.asarray(A)
+            # positive strides
+            slices = tuple(slice(None, None, 2) for _ in range(dim))
+            a = A[slices]
+            b = B[slices]
+
+            result = dpnp.matmul(b, b)
+            expected = numpy.matmul(a, a)
+            assert_dtype_allclose(result, expected)
+
+            # negative strides
+            slices = tuple(slice(None, None, -2) for _ in range(dim))
+            a = A[slices]
+            b = B[slices]
+
+            result = dpnp.matmul(b, b)
+            expected = numpy.matmul(a, a)
+            assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "dtype", get_all_dtypes(no_none=True, no_bool=True)
+    )
+    def test_matmul_out(self, dtype):
+        a1 = numpy.arange(5 * 4, dtype=dtype).reshape(5, 4)
+        a2 = numpy.arange(7 * 4, dtype=dtype).reshape(4, 7)
+
+        b1 = dpnp.asarray(a1)
+        b2 = dpnp.asarray(a2)
+
+        dpnp_out = dpnp.empty((5, 7), dtype=dtype)
+        result = dpnp.matmul(b1, b2, out=dpnp_out)
+        expected = numpy.matmul(a1, a2)
+        assert result is dpnp_out
+        assert_dtype_allclose(result, expected)
+
+
+class TestMatmulInvalidCases:
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((3, 2), ()),
+            ((), (3, 2)),
+            ((), ()),
+        ],
+    )
+    def test_zero_dim(self, shape_pair):
+        for xp in (numpy, dpnp):
+            shape1, shape2 = shape_pair
+            x1 = xp.arange(numpy.prod(shape1), dtype=xp.float32).reshape(shape1)
+            x2 = xp.arange(numpy.prod(shape2), dtype=xp.float32).reshape(shape2)
+            with pytest.raises(ValueError):
+                xp.matmul(x1, x2)
+
+    @pytest.mark.parametrize(
+        "shape_pair",
+        [
+            ((5, 3, 1), (3, 1, 4)),
+            ((3, 2, 3), (3, 2, 4)),
+            ((3, 2), (1,)),
+            ((1, 2), (3, 1)),
+            ((4, 3, 2), (6, 5, 2, 4)),
+            ((6, 5, 3, 2), (3, 2, 4)),
+        ],
+    )
+    def test_invalid_shape(self, shape_pair):
+        for xp in (numpy, dpnp):
+            shape1, shape2 = shape_pair
+            x1 = xp.arange(numpy.prod(shape1), dtype=xp.float32).reshape(shape1)
+            x2 = xp.arange(numpy.prod(shape2), dtype=xp.float32).reshape(shape2)
+            with pytest.raises(ValueError):
+                xp.matmul(x1, x2)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)[:-2])
+    def test_invalid_dtype(self, dtype):
+        dpnp_dtype = get_all_dtypes(no_none=True)[-1]
+        a1 = dpnp.arange(5 * 4, dtype=dpnp_dtype).reshape(5, 4)
+        a2 = dpnp.arange(7 * 4, dtype=dpnp_dtype).reshape(4, 7)
+        dp_out = dpnp.empty((5, 7), dtype=dtype)
+
+        with pytest.raises(TypeError):
+            dpnp.matmul(a1, a2, out=dp_out)
+
+    def test_exe_q(self):
+        x1 = dpnp.ones((5, 4), sycl_queue=dpctl.SyclQueue())
+        x2 = dpnp.ones((4, 7), sycl_queue=dpctl.SyclQueue())
+
+        with pytest.raises(ValueError):
+            dpnp.matmul(x1, x2)
+
+    # TODO: Temporary skipping the test, until Internal CI is updated with
+    # recent changed in dpctl regarding dpt.result_type function
+    @pytest.mark.skip("Temporary skipping the test")
+    def test_matmul_casting(self):
+        a1 = dpnp.arange(2 * 4, dtype=dpnp.float32).reshape(2, 4)
+        a2 = dpnp.arange(4 * 3).reshape(4, 3)
+
+        res = dpnp.empty((2, 3), dtype=dpnp.int64)
+        with pytest.raises(TypeError):
+            dpnp.matmul(a1, a2, out=res, casting="safe")
+
+    def test_matmul_subok(self):
+        a1 = dpnp.arange(2 * 4).reshape(2, 4)
+        a2 = dpnp.arange(4 * 3).reshape(4, 3)
+
+        with pytest.raises(NotImplementedError):
+            dpnp.matmul(a1, a2, subok=False)
diff --git a/tests/test_random_state.py b/tests/test_random_state.py
index 4771eadc42e..70940501d2e 100644
--- a/tests/test_random_state.py
+++ b/tests/test_random_state.py
@@ -491,7 +491,7 @@ def test_rng_zero_and_extremes(self):
 
         sycl_device = dpctl.SyclQueue().sycl_device
         if sycl_device.has_aspect_gpu and not sycl_device.has_aspect_fp64:
-            # TODO: discuss with opneMKL
+            # TODO: discuss with oneMKL
             pytest.skip(
                 f"Due to some reason, oneMKL wrongly returns high value instead of low"
             )
diff --git a/tests/test_sort.py b/tests/test_sort.py
index 7ef6c23f734..1899604a304 100644
--- a/tests/test_sort.py
+++ b/tests/test_sort.py
@@ -4,7 +4,161 @@
 
 import dpnp
 
-from .helper import get_all_dtypes
+from .helper import assert_dtype_allclose, get_all_dtypes, get_complex_dtypes
+
+
+class TestSort:
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+    def test_sort_dtype(self, dtype):
+        a = numpy.random.uniform(-5, 5, 10)
+        np_array = numpy.array(a, dtype=dtype)
+        dp_array = dpnp.array(np_array)
+
+        result = dpnp.sort(dp_array)
+        expected = numpy.sort(np_array)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    def test_sort_complex(self, dtype):
+        a = numpy.random.uniform(-5, 5, 10)
+        b = numpy.random.uniform(-5, 5, 10)
+        np_array = numpy.array(a + b * 1j, dtype=dtype)
+        dp_array = dpnp.array(np_array)
+
+        result = dpnp.sort(dp_array)
+        expected = numpy.sort(np_array)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("axis", [None, -2, -1, 0, 1, 2])
+    def test_sort_axis(self, axis):
+        a = numpy.random.uniform(-10, 10, 36)
+        np_array = numpy.array(a).reshape(3, 4, 3)
+        dp_array = dpnp.array(np_array)
+
+        result = dpnp.sort(dp_array, axis=axis)
+        expected = numpy.sort(np_array, axis=axis)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    @pytest.mark.parametrize("axis", [-2, -1, 0, 1])
+    def test_sort_ndarray(self, dtype, axis):
+        a = numpy.random.uniform(-10, 10, 12)
+        np_array = numpy.array(a, dtype=dtype).reshape(6, 2)
+        dp_array = dpnp.array(np_array)
+
+        dp_array.sort(axis=axis)
+        np_array.sort(axis=axis)
+        assert_dtype_allclose(dp_array, np_array)
+
+    def test_sort_stable(self):
+        np_array = numpy.repeat(numpy.arange(10), 10)
+        dp_array = dpnp.array(np_array)
+
+        result = dpnp.sort(dp_array, kind="stable")
+        expected = numpy.sort(np_array, kind="stable")
+        assert_dtype_allclose(result, expected)
+
+    def test_sort_ndarray_axis_none(self):
+        a = numpy.random.uniform(-10, 10, 12)
+        dp_array = dpnp.array(a).reshape(6, 2)
+        with pytest.raises(TypeError):
+            dp_array.sort(axis=None)
+
+    def test_sort_zero_dim(self):
+        np_array = numpy.array(2.5)
+        dp_array = dpnp.array(np_array)
+
+        # with default axis=-1
+        with pytest.raises(numpy.AxisError):
+            dpnp.sort(dp_array)
+
+        # with axis = None
+        result = dpnp.sort(dp_array, axis=None)
+        expected = numpy.sort(np_array, axis=None)
+        assert_dtype_allclose(result, expected)
+
+    def test_sort_notimplemented(self):
+        dp_array = dpnp.arange(10)
+
+        with pytest.raises(NotImplementedError):
+            dpnp.sort(dp_array, kind="quicksort")
+
+        with pytest.raises(NotImplementedError):
+            dpnp.sort(dp_array, order=["age"])
+
+
+class TestArgsort:
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+    def test_argsort_dtype(self, dtype):
+        a = numpy.random.uniform(-5, 5, 10)
+        np_array = numpy.array(a, dtype=dtype)
+        dp_array = dpnp.array(np_array)
+
+        result = dpnp.argsort(dp_array, kind="stable")
+        expected = numpy.argsort(np_array, kind="stable")
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    def test_argsort_complex(self, dtype):
+        a = numpy.random.uniform(-5, 5, 10)
+        b = numpy.random.uniform(-5, 5, 10)
+        np_array = numpy.array(a + b * 1j, dtype=dtype)
+        dp_array = dpnp.array(np_array)
+
+        result = dpnp.argsort(dp_array)
+        expected = numpy.argsort(np_array)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("axis", [None, -2, -1, 0, 1, 2])
+    def test_argsort_axis(self, axis):
+        a = numpy.random.uniform(-10, 10, 36)
+        np_array = numpy.array(a).reshape(3, 4, 3)
+        dp_array = dpnp.array(np_array)
+
+        result = dpnp.argsort(dp_array, axis=axis)
+        expected = numpy.argsort(np_array, axis=axis)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    @pytest.mark.parametrize("axis", [None, -2, -1, 0, 1])
+    def test_argsort_ndarray(self, dtype, axis):
+        a = numpy.random.uniform(-10, 10, 12)
+        np_array = numpy.array(a, dtype=dtype).reshape(6, 2)
+        dp_array = dpnp.array(np_array)
+
+        result = dp_array.argsort(axis=axis)
+        expected = np_array.argsort(axis=axis)
+        assert_dtype_allclose(result, expected)
+
+    def test_argsort_stable(self):
+        np_array = numpy.repeat(numpy.arange(10), 10)
+        dp_array = dpnp.array(np_array)
+
+        result = dpnp.argsort(dp_array, kind="stable")
+        expected = numpy.argsort(np_array, kind="stable")
+        assert_dtype_allclose(result, expected)
+
+    def test_argsort_zero_dim(self):
+        np_array = numpy.array(2.5)
+        dp_array = dpnp.array(np_array)
+
+        # with default axis=-1
+        with pytest.raises(numpy.AxisError):
+            dpnp.argsort(dp_array)
+
+        # with axis = None
+        result = dpnp.argsort(dp_array, axis=None)
+        expected = numpy.argsort(np_array, axis=None)
+        assert_dtype_allclose(result, expected)
+
+    def test_sort_notimplemented(self):
+        dp_array = dpnp.arange(10)
+
+        with pytest.raises(NotImplementedError):
+            dpnp.argsort(dp_array, kind="quicksort")
+
+        with pytest.raises(NotImplementedError):
+            dpnp.argsort(dp_array, order=["age"])
 
 
 @pytest.mark.parametrize("kth", [0, 1], ids=["0", "1"])
diff --git a/tests/test_statistics.py b/tests/test_statistics.py
index f3866b3c27e..1f340ec4490 100644
--- a/tests/test_statistics.py
+++ b/tests/test_statistics.py
@@ -1,5 +1,4 @@
-import warnings
-
+import dpctl
 import dpctl.tensor as dpt
 import numpy
 import pytest
@@ -13,6 +12,7 @@
 from .helper import (
     assert_dtype_allclose,
     get_all_dtypes,
+    get_complex_dtypes,
     get_float_complex_dtypes,
     has_support_aspect64,
 )
@@ -32,127 +32,280 @@ def test_median(dtype, size):
     assert_allclose(dpnp_res, np_res)
 
 
-@pytest.mark.parametrize("func", ["max", "min", "nanmax", "nanmin"])
-@pytest.mark.parametrize("axis", [None, 0, 1, -1, 2, -2, (1, 2), (0, -2)])
-@pytest.mark.parametrize("keepdims", [False, True])
-@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
-def test_max_min(func, axis, keepdims, dtype):
-    a = numpy.arange(768, dtype=dtype).reshape((4, 4, 6, 8))
-    if func in ["nanmax", "nanmin"] and dpnp.issubdtype(a.dtype, dpnp.inexact):
-        a[2:3, 2, 3:4, 4] = numpy.nan
-    ia = dpnp.array(a)
+class TestMaxMin:
+    @pytest.mark.parametrize("func", ["max", "min", "nanmax", "nanmin"])
+    @pytest.mark.parametrize("axis", [None, 0, 1, -1, 2, -2, (1, 2), (0, -2)])
+    @pytest.mark.parametrize("keepdims", [False, True])
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_max_min(self, func, axis, keepdims, dtype):
+        a = numpy.arange(768, dtype=dtype).reshape((4, 4, 6, 8))
+        if func in ["nanmax", "nanmin"] and dpnp.issubdtype(
+            a.dtype, dpnp.inexact
+        ):
+            a[2:3, 2, 3:4, 4] = numpy.nan
+        ia = dpnp.array(a)
 
-    np_res = getattr(numpy, func)(a, axis=axis, keepdims=keepdims)
-    dpnp_res = getattr(dpnp, func)(ia, axis=axis, keepdims=keepdims)
-    assert_dtype_allclose(dpnp_res, np_res)
+        np_res = getattr(numpy, func)(a, axis=axis, keepdims=keepdims)
+        dpnp_res = getattr(dpnp, func)(ia, axis=axis, keepdims=keepdims)
+        assert_dtype_allclose(dpnp_res, np_res)
+
+    @pytest.mark.parametrize("func", ["max", "min", "nanmax", "nanmin"])
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_max_min_strided(self, func, dtype):
+        a = numpy.arange(20, dtype=dtype)
+        if func in ["nanmax", "nanmin"] and dpnp.issubdtype(
+            a.dtype, dpnp.inexact
+        ):
+            a[::3] = numpy.nan
+        ia = dpnp.array(a)
 
+        np_res = getattr(numpy, func)(a[::-1])
+        dpnp_res = getattr(dpnp, func)(ia[::-1])
+        assert_dtype_allclose(dpnp_res, np_res)
 
-@pytest.mark.parametrize("func", ["max", "min"])
-@pytest.mark.parametrize("axis", [None, 0, 1, -1])
-@pytest.mark.parametrize("keepdims", [False, True])
-def test_max_min_bool(func, axis, keepdims):
-    a = numpy.arange(2, dtype=numpy.bool_)
-    a = numpy.tile(a, (2, 2))
-    ia = dpnp.array(a)
+        np_res = getattr(numpy, func)(a[::2])
+        dpnp_res = getattr(dpnp, func)(ia[::2])
+        assert_dtype_allclose(dpnp_res, np_res)
 
-    np_res = getattr(numpy, func)(a, axis=axis, keepdims=keepdims)
-    dpnp_res = getattr(dpnp, func)(ia, axis=axis, keepdims=keepdims)
-    assert_dtype_allclose(dpnp_res, np_res)
+    @pytest.mark.parametrize("func", ["max", "min"])
+    @pytest.mark.parametrize("axis", [None, 0, 1, -1])
+    @pytest.mark.parametrize("keepdims", [False, True])
+    def test_max_min_bool(self, func, axis, keepdims):
+        a = numpy.arange(2, dtype=numpy.bool_)
+        a = numpy.tile(a, (2, 2))
+        ia = dpnp.array(a)
 
+        np_res = getattr(numpy, func)(a, axis=axis, keepdims=keepdims)
+        dpnp_res = getattr(dpnp, func)(ia, axis=axis, keepdims=keepdims)
+        assert_dtype_allclose(dpnp_res, np_res)
 
-@pytest.mark.parametrize("func", ["max", "min", "nanmax", "nanmin"])
-def test_max_min_out(func):
-    a = numpy.arange(12, dtype=numpy.float32).reshape((2, 2, 3))
-    if func in ["nanmax", "nanmin"]:
-        a[1, 0, 2] = numpy.nan
-    ia = dpnp.array(a)
+    @pytest.mark.parametrize("func", ["max", "min", "nanmax", "nanmin"])
+    def test_max_min_out(self, func):
+        a = numpy.arange(12, dtype=numpy.float32).reshape((2, 2, 3))
+        if func in ["nanmax", "nanmin"]:
+            a[1, 0, 2] = numpy.nan
+        ia = dpnp.array(a)
 
-    # out is dpnp_array
-    np_res = getattr(numpy, func)(a, axis=0)
-    dpnp_out = dpnp.empty(np_res.shape, dtype=np_res.dtype)
-    dpnp_res = getattr(dpnp, func)(ia, axis=0, out=dpnp_out)
-    assert dpnp_out is dpnp_res
-    assert_allclose(dpnp_res, np_res)
+        # out is dpnp_array
+        np_res = getattr(numpy, func)(a, axis=0)
+        dpnp_out = dpnp.empty(np_res.shape, dtype=np_res.dtype)
+        dpnp_res = getattr(dpnp, func)(ia, axis=0, out=dpnp_out)
+        assert dpnp_out is dpnp_res
+        assert_allclose(dpnp_res, np_res)
+
+        # out is usm_ndarray
+        dpt_out = dpt.empty(np_res.shape, dtype=np_res.dtype)
+        dpnp_res = getattr(dpnp, func)(ia, axis=0, out=dpt_out)
+        assert dpt_out is dpnp_res.get_array()
+        assert_allclose(dpnp_res, np_res)
+
+        # output is numpy array -> Error
+        dpnp_res = numpy.empty_like(np_res)
+        with pytest.raises(TypeError):
+            getattr(dpnp, func)(ia, axis=0, out=dpnp_res)
 
-    # out is usm_ndarray
-    dpt_out = dpt.empty(np_res.shape, dtype=np_res.dtype)
-    dpnp_res = getattr(dpnp, func)(ia, axis=0, out=dpt_out)
-    assert dpt_out is dpnp_res.get_array()
-    assert_allclose(dpnp_res, np_res)
+        # output has incorrect shape -> Error
+        dpnp_res = dpnp.array(numpy.empty((4, 2)))
+        with pytest.raises(ValueError):
+            getattr(dpnp, func)(ia, axis=0, out=dpnp_res)
 
-    # output is numpy array -> Error
-    dpnp_res = numpy.empty_like(np_res)
-    with pytest.raises(TypeError):
-        getattr(dpnp, func)(ia, axis=0, out=dpnp_res)
+    @pytest.mark.parametrize("func", ["max", "min", "nanmax", "nanmin"])
+    def test_max_min_error(self, func):
+        ia = dpnp.arange(5)
+        # where is not supported
+        with pytest.raises(NotImplementedError):
+            getattr(dpnp, func)(ia, where=False)
 
-    # output has incorrect shape -> Error
-    dpnp_res = dpnp.array(numpy.empty((4, 2)))
-    with pytest.raises(ValueError):
-        getattr(dpnp, func)(ia, axis=0, out=dpnp_res)
+        # initial is not supported
+        with pytest.raises(NotImplementedError):
+            getattr(dpnp, func)(ia, initial=6)
 
+    @pytest.mark.parametrize("func", ["nanmax", "nanmin"])
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_nanmax_nanmin_no_NaN(self, func, dtype):
+        a = numpy.arange(768, dtype=dtype).reshape((4, 4, 6, 8))
+        ia = dpnp.array(a)
 
-@pytest.mark.parametrize("func", ["max", "min", "nanmax", "nanmin"])
-def test_max_min_error(func):
-    ia = dpnp.arange(5)
-    # where is not supported
-    with pytest.raises(NotImplementedError):
-        getattr(dpnp, func)(ia, where=False)
+        np_res = getattr(numpy, func)(a, axis=0)
+        dpnp_res = getattr(dpnp, func)(ia, axis=0)
+        assert_dtype_allclose(dpnp_res, np_res)
 
-    # initial is not supported
-    with pytest.raises(NotImplementedError):
-        getattr(dpnp, func)(ia, initial=6)
+    @pytest.mark.parametrize("func", ["nanmax", "nanmin"])
+    def test_nanmax_nanmin_all_NaN(self, recwarn, func):
+        a = numpy.arange(12, dtype=numpy.float32).reshape((2, 2, 3))
+        a[:, :, 2] = numpy.nan
+        ia = dpnp.array(a)
 
+        np_res = getattr(numpy, func)(a, axis=0)
+        dpnp_res = getattr(dpnp, func)(ia, axis=0)
+        assert_dtype_allclose(dpnp_res, np_res)
 
-@pytest.mark.parametrize("func", ["nanmax", "nanmin"])
-@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
-def test_nanmax_nanmin_no_NaN(func, dtype):
-    a = numpy.arange(768, dtype=dtype).reshape((4, 4, 6, 8))
-    ia = dpnp.array(a)
+        assert len(recwarn) == 2
+        assert all(
+            "All-NaN slice encountered" in str(r.message) for r in recwarn
+        )
+        assert all(r.category is RuntimeWarning for r in recwarn)
 
-    np_res = getattr(numpy, func)(a, axis=0)
-    dpnp_res = getattr(dpnp, func)(ia, axis=0)
-    assert_dtype_allclose(dpnp_res, np_res)
 
+class TestAverage:
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    @pytest.mark.parametrize("axis", [None, 0, 1])
+    @pytest.mark.parametrize("returned", [True, False])
+    def test_avg_no_wgt(self, dtype, axis, returned):
+        dp_array = dpnp.array([[1, 1, 2], [3, 4, 5]], dtype=dtype)
+        np_array = dpnp.asnumpy(dp_array)
 
-@pytest.mark.parametrize("func", ["nanmax", "nanmin"])
-def test_nanmax_nanmin_all_NaN(recwarn, func):
-    a = numpy.arange(12, dtype=numpy.float32).reshape((2, 2, 3))
-    a[:, :, 2] = numpy.nan
-    ia = dpnp.array(a)
+        result = dpnp.average(dp_array, axis=axis, returned=returned)
+        expected = numpy.average(np_array, axis=axis, returned=returned)
+        if returned:
+            assert_dtype_allclose(result[0], expected[0])
+            assert_dtype_allclose(result[1], expected[1])
+        else:
+            assert_dtype_allclose(result, expected)
 
-    np_res = getattr(numpy, func)(a, axis=0)
-    dpnp_res = getattr(dpnp, func)(ia, axis=0)
-    assert_dtype_allclose(dpnp_res, np_res)
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    @pytest.mark.parametrize("axis", [None, 0, 1, (0, 1)])
+    @pytest.mark.parametrize("returned", [True, False])
+    def test_avg(self, dtype, axis, returned):
+        dp_array = dpnp.array([[1, 1, 2], [3, 4, 5]], dtype=dtype)
+        dp_wgt = dpnp.array([[3, 1, 2], [3, 4, 2]], dtype=dtype)
+        np_array = dpnp.asnumpy(dp_array)
+        np_wgt = dpnp.asnumpy(dp_wgt)
+
+        result = dpnp.average(
+            dp_array, axis=axis, weights=dp_wgt, returned=returned
+        )
+        expected = numpy.average(
+            np_array, axis=axis, weights=np_wgt, returned=returned
+        )
+
+        if returned:
+            assert_dtype_allclose(result[0], expected[0])
+            assert_dtype_allclose(result[1], expected[1])
+        else:
+            assert_dtype_allclose(result, expected)
 
-    assert len(recwarn) == 2
-    assert all("All-NaN slice encountered" in str(r.message) for r in recwarn)
-    assert all(r.category is RuntimeWarning for r in recwarn)
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    def test_avg_complex(self, dtype):
+        x1 = numpy.random.rand(10)
+        x2 = numpy.random.rand(10)
+        a = numpy.array(x1 + 1j * x2, dtype=dtype)
+        w = numpy.array(x2 + 1j * x1, dtype=dtype)
+        ia = dpnp.array(a)
+        iw = dpnp.array(w)
+
+        expected = numpy.average(a, weights=w)
+        result = dpnp.average(ia, weights=iw)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize(
+        "weight",
+        [[[3, 1, 2], [3, 4, 2]], ((3, 1, 2), (3, 4, 2))],
+        ids=["list", "tuple"],
+    )
+    def test_avg_weight_array_like(self, weight):
+        dp_array = dpnp.array([[1, 1, 2], [3, 4, 5]])
+        wgt = weight
+        np_array = dpnp.asnumpy(dp_array)
+
+        res = dpnp.average(dp_array, weights=wgt)
+        exp = numpy.average(np_array, weights=wgt)
+        assert_dtype_allclose(res, exp)
+
+    def test_avg_weight_1D(self):
+        dp_array = dpnp.arange(12).reshape(3, 4)
+        wgt = [1, 2, 3]
+        np_array = dpnp.asnumpy(dp_array)
+
+        res = dpnp.average(dp_array, axis=0, weights=wgt)
+        exp = numpy.average(np_array, axis=0, weights=wgt)
+        assert_dtype_allclose(res, exp)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True))
+    def test_avg_strided(self, dtype):
+        dp_array = dpnp.arange(20, dtype=dtype)
+        dp_wgt = dpnp.arange(-10, 10, dtype=dtype)
+        np_array = dpnp.asnumpy(dp_array)
+        np_wgt = dpnp.asnumpy(dp_wgt)
+
+        result = dpnp.average(dp_array[::-1], weights=dp_wgt[::-1])
+        expected = numpy.average(np_array[::-1], weights=np_wgt[::-1])
+        assert_allclose(expected, result)
+
+        result = dpnp.average(dp_array[::2], weights=dp_wgt[::2])
+        expected = numpy.average(np_array[::2], weights=np_wgt[::2])
+        assert_allclose(expected, result)
+
+    def test_avg_error(self):
+        a = dpnp.arange(5)
+        w = dpnp.zeros(5)
+        # Weights sum to zero
+        with pytest.raises(ZeroDivisionError):
+            dpnp.average(a, weights=w)
+
+        a = dpnp.arange(12).reshape(3, 4)
+        w = dpnp.ones(12)
+        # Axis must be specified when shapes of input array and weights differ
+        with pytest.raises(TypeError):
+            dpnp.average(a, weights=w)
+
+        a = dpnp.arange(12).reshape(3, 4)
+        w = dpnp.ones(12).reshape(2, 6)
+        # 1D weights expected when shapes of input array and weights differ.
+        with pytest.raises(TypeError):
+            dpnp.average(a, axis=0, weights=w)
+
+        a = dpnp.arange(12).reshape(3, 4)
+        w = dpnp.ones(12)
+        # Length of weights not compatible with specified axis.
+        with pytest.raises(ValueError):
+            dpnp.average(a, axis=0, weights=w)
+
+        a = dpnp.arange(12, sycl_queue=dpctl.SyclQueue())
+        w = dpnp.ones(12, sycl_queue=dpctl.SyclQueue())
+        # Execution placement can not be unambiguously inferred
+        with pytest.raises(ValueError):
+            dpnp.average(a, axis=0, weights=w)
 
 
 class TestMean:
     @pytest.mark.parametrize("dtype", get_all_dtypes())
-    def test_mean_axis_tuple(self, dtype):
+    @pytest.mark.parametrize("axis", [None, 0, 1, (0, 1)])
+    @pytest.mark.parametrize("keepdims", [True, False])
+    def test_mean(self, dtype, axis, keepdims):
         dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dtype)
         np_array = dpnp.asnumpy(dp_array)
 
-        result = dpnp.mean(dp_array, axis=(0, 1))
-        expected = numpy.mean(np_array, axis=(0, 1))
-        assert_allclose(expected, result)
+        result = dpnp.mean(dp_array, axis=axis, keepdims=keepdims)
+        expected = numpy.mean(np_array, axis=axis, keepdims=keepdims)
+        assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes())
-    @pytest.mark.parametrize("axis", [0, 1, (0, 1)])
+    @pytest.mark.parametrize("axis", [0, 1])
     def test_mean_out(self, dtype, axis):
         dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dtype)
         np_array = dpnp.asnumpy(dp_array)
 
         expected = numpy.mean(np_array, axis=axis)
-        result = dpnp.empty_like(dpnp.asarray(expected))
-        dpnp.mean(dp_array, axis=axis, out=result)
+        out = dpnp.empty_like(dpnp.asarray(expected))
+        result = dpnp.mean(dp_array, axis=axis, out=out)
+        assert result is out
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    def test_mean_complex(self, dtype):
+        x1 = numpy.random.rand(10)
+        x2 = numpy.random.rand(10)
+        a = numpy.array(x1 + 1j * x2, dtype=dtype)
+        ia = dpnp.array(a)
+
+        expected = numpy.mean(a)
+        result = dpnp.mean(ia)
         assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize("dtype", get_all_dtypes())
     def test_mean_dtype(self, dtype):
-        dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype="i4")
+        dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]])
         np_array = dpnp.asnumpy(dp_array)
 
         expected = numpy.mean(np_array, dtype=dtype)
@@ -173,8 +326,9 @@ def test_mean_empty(self, axis, shape):
         expected = numpy.mean(np_array, axis=axis)
         assert_allclose(expected, result)
 
-    def test_mean_strided(self):
-        dp_array = dpnp.array([-2, -1, 0, 1, 0, 2], dtype="f4")
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    def test_mean_strided(self, dtype):
+        dp_array = dpnp.array([-2, -1, 0, 1, 0, 2], dtype=dtype)
         np_array = dpnp.asnumpy(dp_array)
 
         result = dpnp.mean(dp_array[::-1])
@@ -199,6 +353,91 @@ def test_mean_NotImplemented(self):
             dpnp.mean(ia, where=False)
 
 
+class TestNanMean:
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    @pytest.mark.parametrize("axis", [None, 0, 1, (0, 1)])
+    @pytest.mark.parametrize("keepdims", [True, False])
+    def test_nanmean(self, dtype, axis, keepdims):
+        dp_array = dpnp.array([[0, 1, 2], [3, 4, 0]], dtype=dtype)
+        np_array = dpnp.asnumpy(dp_array)
+
+        result = dpnp.nanmean(dp_array, axis=axis, keepdims=keepdims)
+        expected = numpy.nanmean(np_array, axis=axis, keepdims=keepdims)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    @pytest.mark.parametrize("axis", [0, 1])
+    def test_nanmean_out(self, dtype, axis):
+        dp_array = dpnp.array([[dpnp.nan, 1, 2], [3, dpnp.nan, 0]], dtype=dtype)
+        np_array = dpnp.asnumpy(dp_array)
+
+        expected = numpy.nanmean(np_array, axis=axis)
+        out = dpnp.empty_like(dpnp.asarray(expected))
+        result = dpnp.nanmean(dp_array, axis=axis, out=out)
+        assert out is result
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    def test_nanmean_complex(self, dtype):
+        x1 = numpy.random.rand(10)
+        x2 = numpy.random.rand(10)
+        a = numpy.array(x1 + 1j * x2, dtype=dtype)
+        a[::3] = numpy.nan
+        ia = dpnp.array(a)
+
+        expected = numpy.nanmean(a)
+        result = dpnp.nanmean(ia)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_nanmean_dtype(self, dtype):
+        dp_array = dpnp.array([[dpnp.nan, 1, 2], [3, dpnp.nan, 0]])
+        np_array = dpnp.asnumpy(dp_array)
+
+        expected = numpy.nanmean(np_array, dtype=dtype)
+        result = dpnp.nanmean(dp_array, dtype=dtype)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_nanmean_strided(self, dtype):
+        dp_array = dpnp.arange(20, dtype=dtype)
+        dp_array[::3] = dpnp.nan
+        np_array = dpnp.asnumpy(dp_array)
+
+        result = dpnp.nanmean(dp_array[::-1])
+        expected = numpy.nanmean(np_array[::-1])
+        assert_dtype_allclose(result, expected)
+
+        result = dpnp.nanmean(dp_array[::2])
+        expected = numpy.nanmean(np_array[::2])
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.usefixtures("suppress_mean_empty_slice_numpy_warnings")
+    def test_nanmean_scalar(self):
+        dp_array = dpnp.array(dpnp.nan)
+        np_array = dpnp.asnumpy(dp_array)
+
+        result = dpnp.nanmean(dp_array)
+        expected = numpy.nanmean(np_array)
+        assert_allclose(expected, result)
+
+    def test_nanmean_error(self):
+        ia = dpnp.arange(5, dtype=dpnp.float32)
+        ia[0] = dpnp.nan
+        # where keyword is not implemented
+        with pytest.raises(NotImplementedError):
+            dpnp.nanmean(ia, where=False)
+
+        # dtype should be floating
+        with pytest.raises(TypeError):
+            dpnp.nanmean(ia, dtype=dpnp.int32)
+
+        # out dtype should be inexact
+        res = dpnp.empty((1,), dtype=dpnp.int32)
+        with pytest.raises(TypeError):
+            dpnp.nanmean(ia, out=res)
+
+
 class TestVar:
     @pytest.mark.usefixtures(
         "suppress_divide_invalid_numpy_warnings", "suppress_dof_numpy_warnings"
@@ -234,8 +473,9 @@ def test_var_out(self, dtype, axis, ddof):
             res_dtype = expected.dtype
         else:
             res_dtype = dpnp.default_float_type(dp_array.device)
-        result = dpnp.empty(expected.shape, dtype=res_dtype)
-        dpnp.var(dp_array, axis=axis, out=result, ddof=ddof)
+        out = dpnp.empty(expected.shape, dtype=res_dtype)
+        result = dpnp.var(dp_array, axis=axis, out=out, ddof=ddof)
+        assert result is out
         assert_dtype_allclose(result, expected)
 
     @pytest.mark.usefixtures(
@@ -329,8 +569,9 @@ def test_std_out(self, dtype, axis, ddof):
             res_dtype = expected.dtype
         else:
             res_dtype = dpnp.default_float_type(dp_array.device)
-        result = dpnp.empty(expected.shape, dtype=res_dtype)
-        dpnp.std(dp_array, axis=axis, out=result, ddof=ddof)
+        out = dpnp.empty(expected.shape, dtype=res_dtype)
+        result = dpnp.std(dp_array, axis=axis, out=out, ddof=ddof)
+        assert out is result
         assert_dtype_allclose(result, expected)
 
     @pytest.mark.usefixtures(
@@ -443,6 +684,18 @@ def test_nanvar(self, array, dtype):
             result = dpnp.nanvar(ia, ddof=ddof)
             assert_dtype_allclose(result, expected)
 
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    def test_nanvar_complex(self, dtype):
+        x1 = numpy.random.rand(10)
+        x2 = numpy.random.rand(10)
+        a = numpy.array(x1 + 1j * x2, dtype=dtype)
+        a[::3] = numpy.nan
+        ia = dpnp.array(a)
+
+        expected = numpy.nanvar(a)
+        result = dpnp.nanvar(ia)
+        assert_dtype_allclose(result, expected)
+
     @pytest.mark.usefixtures("suppress_dof_numpy_warnings")
     @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
     @pytest.mark.parametrize("axis", [None, 0, 1, 2, (0, 1), (1, 2)])
@@ -459,8 +712,25 @@ def test_nanvar_out(self, dtype, axis, keepdims, ddof):
             res_dtype = expected.dtype
         else:
             res_dtype = dpnp.default_float_type(ia.device)
-        result = dpnp.empty(expected.shape, dtype=res_dtype)
-        dpnp.nanvar(ia, out=result, axis=axis, ddof=ddof, keepdims=keepdims)
+        out = dpnp.empty(expected.shape, dtype=res_dtype)
+        result = dpnp.nanvar(
+            ia, out=out, axis=axis, ddof=ddof, keepdims=keepdims
+        )
+        assert result is out
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_nanvar_strided(self, dtype):
+        dp_array = dpnp.arange(20, dtype=dtype)
+        dp_array[::3] = dpnp.nan
+        np_array = dpnp.asnumpy(dp_array)
+
+        result = dpnp.nanvar(dp_array[::-1])
+        expected = numpy.nanvar(np_array[::-1])
+        assert_dtype_allclose(result, expected)
+
+        result = dpnp.nanvar(dp_array[::2])
+        expected = numpy.nanvar(np_array[::2])
         assert_dtype_allclose(result, expected)
 
     @pytest.mark.usefixtures("suppress_complex_warning")
@@ -497,6 +767,133 @@ def test_nanvar_error(self):
             dpnp.nanvar(ia, ddof="1")
 
 
+class TestNanStd:
+    @pytest.mark.parametrize(
+        "array",
+        [
+            [2, 0, 6, 2],
+            [2, 0, 6, 2, 5, 6, 7, 8],
+            [],
+            [2, 1, numpy.nan, 5, 3],
+            [-1, numpy.nan, 1, numpy.inf],
+            [3, 6, 0, 1],
+            [3, 6, 0, 1, 8],
+            [3, 2, 9, 6, numpy.nan],
+            [numpy.nan, numpy.nan, numpy.inf, numpy.nan],
+            [[2, 0], [6, 2]],
+            [[2, 0, 6, 2], [5, 6, 7, 8]],
+            [[[2, 0], [6, 2]], [[5, 6], [7, 8]]],
+            [[-1, numpy.nan], [1, numpy.inf]],
+            [[numpy.nan, numpy.nan], [numpy.inf, numpy.nan]],
+        ],
+        ids=[
+            "[2, 0, 6, 2]",
+            "[2, 0, 6, 2, 5, 6, 7, 8]",
+            "[]",
+            "[2, 1, np.nan, 5, 3]",
+            "[-1, np.nan, 1, np.inf]",
+            "[3, 6, 0, 1]",
+            "[3, 6, 0, 1, 8]",
+            "[3, 2, 9, 6, np.nan]",
+            "[np.nan, np.nan, np.inf, np.nan]",
+            "[[2, 0], [6, 2]]",
+            "[[2, 0, 6, 2], [5, 6, 7, 8]]",
+            "[[[2, 0], [6, 2]], [[5, 6], [7, 8]]]",
+            "[[-1, np.nan], [1, np.inf]]",
+            "[[np.nan, np.nan], [np.inf, np.nan]]",
+        ],
+    )
+    @pytest.mark.usefixtures(
+        "suppress_invalid_numpy_warnings", "suppress_dof_numpy_warnings"
+    )
+    @pytest.mark.parametrize(
+        "dtype", get_all_dtypes(no_none=True, no_bool=True)
+    )
+    def test_nanstd(self, array, dtype):
+        try:
+            a = numpy.array(array, dtype=dtype)
+        except:
+            pytest.skip("floating datat type is needed to store NaN")
+        ia = dpnp.array(a)
+        for ddof in range(a.ndim):
+            expected = numpy.nanstd(a, ddof=ddof)
+            result = dpnp.nanstd(ia, ddof=ddof)
+            assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_complex_dtypes())
+    def test_nanstd_complex(self, dtype):
+        x1 = numpy.random.rand(10)
+        x2 = numpy.random.rand(10)
+        a = numpy.array(x1 + 1j * x2, dtype=dtype)
+        a[::3] = numpy.nan
+        ia = dpnp.array(a)
+
+        expected = numpy.nanstd(a)
+        result = dpnp.nanstd(ia)
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.usefixtures("suppress_dof_numpy_warnings")
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    @pytest.mark.parametrize("axis", [None, 0, 1, 2, (0, 1), (1, 2)])
+    @pytest.mark.parametrize("keepdims", [True, False])
+    @pytest.mark.parametrize("ddof", [0, 0.5, 1, 1.5, 2, 3])
+    def test_nanstd_out(self, dtype, axis, keepdims, ddof):
+        a = numpy.arange(4 * 3 * 5, dtype=dtype)
+        a[::2] = numpy.nan
+        a = a.reshape(4, 3, 5)
+        ia = dpnp.array(a)
+
+        expected = numpy.nanstd(a, axis=axis, ddof=ddof, keepdims=keepdims)
+        if has_support_aspect64():
+            res_dtype = expected.dtype
+        else:
+            res_dtype = dpnp.default_float_type(ia.device)
+        out = dpnp.empty(expected.shape, dtype=res_dtype)
+        result = dpnp.nanstd(
+            ia, out=out, axis=axis, ddof=ddof, keepdims=keepdims
+        )
+        assert result is out
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_nanstd_strided(self, dtype):
+        dp_array = dpnp.arange(20, dtype=dtype)
+        dp_array[::3] = dpnp.nan
+        np_array = dpnp.asnumpy(dp_array)
+
+        result = dpnp.nanstd(dp_array[::-1])
+        expected = numpy.nanstd(np_array[::-1])
+        assert_dtype_allclose(result, expected)
+
+        result = dpnp.nanstd(dp_array[::2])
+        expected = numpy.nanstd(np_array[::2])
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.usefixtures("suppress_complex_warning")
+    @pytest.mark.parametrize("dt_in", get_float_complex_dtypes())
+    @pytest.mark.parametrize("dt_out", get_float_complex_dtypes())
+    def test_nanstd_dtype(self, dt_in, dt_out):
+        a = numpy.arange(4 * 3 * 5, dtype=dt_in)
+        a[::2] = numpy.nan
+        a = a.reshape(4, 3, 5)
+        ia = dpnp.array(a)
+
+        expected = numpy.nanstd(a, dtype=dt_out)
+        result = dpnp.nanstd(ia, dtype=dt_out)
+        assert_dtype_allclose(result, expected)
+
+    def test_nanstd_error(self):
+        ia = dpnp.arange(5, dtype=dpnp.float32)
+        ia[0] = dpnp.nan
+        # where keyword is not implemented
+        with pytest.raises(NotImplementedError):
+            dpnp.nanstd(ia, where=False)
+
+        # ddof should be an integer or float
+        with pytest.raises(TypeError):
+            dpnp.nanstd(ia, ddof="1")
+
+
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 class TestBincount:
     @pytest.mark.parametrize(
diff --git a/tests/test_strides.py b/tests/test_strides.py
index 903341f1000..6fe28281fc6 100644
--- a/tests/test_strides.py
+++ b/tests/test_strides.py
@@ -6,7 +6,12 @@
 
 import dpnp
 
-from .helper import assert_dtype_allclose, get_all_dtypes
+from .helper import (
+    assert_dtype_allclose,
+    get_all_dtypes,
+    get_complex_dtypes,
+    get_float_complex_dtypes,
+)
 
 
 def _getattr(ex, str_):
@@ -50,6 +55,7 @@ def test_strides(func_name, dtype):
         "arcsinh",
         "arctan",
         "arctanh",
+        "argsort",
         "cbrt",
         "ceil",
         "copy",
@@ -73,6 +79,7 @@ def test_strides(func_name, dtype):
         "sign",
         "sin",
         "sinh",
+        "sort",
         "sqrt",
         "square",
         "tan",
@@ -171,21 +178,27 @@ def test_strides_erf(dtype, shape):
     assert_allclose(result, expected, rtol=1e-06)
 
 
-@pytest.mark.parametrize("dtype", get_all_dtypes(no_bool=True, no_complex=True))
-@pytest.mark.parametrize("shape", [(10,)], ids=["(10,)"])
-def test_strides_reciprocal(dtype, shape):
-    start, stop = 1, numpy.prod(shape) + 1
+@pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+def test_reciprocal(dtype):
+    dpa = dpnp.arange(1, 11, dtype=dtype)[::2]
+    a = numpy.arange(1, 11, dtype=dtype)[::2]
 
-    a = numpy.arange(start, stop, dtype=dtype).reshape(shape)
-    b = a[::2]
+    result = dpnp.reciprocal(dpa)
+    expected = numpy.reciprocal(a)
 
-    dpa = dpnp.reshape(dpnp.arange(start, stop, dtype=dtype), shape)
-    dpb = dpa[::2]
+    assert_dtype_allclose(result, expected)
 
-    result = dpnp.reciprocal(dpb)
-    expected = numpy.reciprocal(b)
 
-    assert_allclose(result, expected, rtol=1e-06)
+@pytest.mark.parametrize("dtype", get_complex_dtypes())
+def test_angle(dtype):
+    a = numpy.random.rand(10)
+    b = numpy.random.rand(10)
+    z = numpy.array(a + 1j * b, dtype=dtype)[::2]
+    dpz = dpnp.array(z)
+
+    result = dpnp.angle(dpz)
+    expected = numpy.angle(z)
+    assert_dtype_allclose(result, expected)
 
 
 @pytest.mark.parametrize(
diff --git a/tests/test_sycl_queue.py b/tests/test_sycl_queue.py
index fb31bd59ebf..7a7bcd53e0b 100644
--- a/tests/test_sycl_queue.py
+++ b/tests/test_sycl_queue.py
@@ -331,7 +331,9 @@ def test_meshgrid(device_x, device_y):
 @pytest.mark.parametrize(
     "func,data",
     [
+        pytest.param("average", [1.0, 2.0, 4.0, 7.0]),
         pytest.param("abs", [-1.2, 1.2]),
+        pytest.param("angle", [[1.0 + 1.0j, 2.0 + 3.0j]]),
         pytest.param("arccos", [-0.5, 0.0, 0.5]),
         pytest.param("arccosh", [1.5, 3.5, 5.0]),
         pytest.param("arcsin", [-0.5, 0.0, 0.5]),
@@ -340,6 +342,7 @@ def test_meshgrid(device_x, device_y):
         pytest.param("arctanh", [-0.5, 0.0, 0.5]),
         pytest.param("argmax", [1.0, 2.0, 4.0, 7.0]),
         pytest.param("argmin", [1.0, 2.0, 4.0, 7.0]),
+        pytest.param("argsort", [2.0, 1.0, 7.0, 4.0]),
         pytest.param("cbrt", [1.0, 8.0, 27.0]),
         pytest.param("ceil", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
         pytest.param("conjugate", [[1.0 + 1.0j, 0.0], [0.0, 1.0 + 1.0j]]),
@@ -374,8 +377,10 @@ def test_meshgrid(device_x, device_y):
         pytest.param("nancumprod", [1.0, dpnp.nan]),
         pytest.param("nancumsum", [1.0, dpnp.nan]),
         pytest.param("nanmax", [1.0, 2.0, 4.0, dpnp.nan]),
+        pytest.param("nanmean", [1.0, 2.0, 4.0, dpnp.nan]),
         pytest.param("nanmin", [1.0, 2.0, 4.0, dpnp.nan]),
         pytest.param("nanprod", [1.0, dpnp.nan]),
+        pytest.param("nanstd", [1.0, 2.0, 4.0, dpnp.nan]),
         pytest.param("nansum", [1.0, dpnp.nan]),
         pytest.param("nanvar", [1.0, 2.0, 4.0, dpnp.nan]),
         pytest.param("negative", [1.0, 0.0, -1.0]),
@@ -385,13 +390,16 @@ def test_meshgrid(device_x, device_y):
         pytest.param(
             "real", [complex(1.0, 2.0), complex(3.0, 4.0), complex(5.0, 6.0)]
         ),
+        pytest.param("reciprocal", [1.0, 2.0, 4.0, 7.0]),
         pytest.param("sign", [-5.0, 0.0, 4.5]),
         pytest.param("signbit", [-5.0, 0.0, 4.5]),
         pytest.param(
             "sin", [-dpnp.pi / 2, -dpnp.pi / 4, 0.0, dpnp.pi / 4, dpnp.pi / 2]
         ),
         pytest.param("sinh", [-5.0, -3.5, 0.0, 3.5, 5.0]),
+        pytest.param("sort", [2.0, 1.0, 7.0, 4.0]),
         pytest.param("sqrt", [1.0, 3.0, 9.0]),
+        pytest.param("square", [1.0, 3.0, 9.0]),
         pytest.param("std", [1.0, 2.0, 4.0, 7.0]),
         pytest.param("sum", [1.0, 2.0]),
         pytest.param(
@@ -409,6 +417,9 @@ def test_meshgrid(device_x, device_y):
     ids=[device.filter_string for device in valid_devices],
 )
 def test_1in_1out(func, data, device):
+    if func in ("std", "var") and "opencl:gpu" in device.filter_string:
+        pytest.skip("due to reproted crash on Windows: CMPLRLLVM-55640")
+
     x = dpnp.array(data, device=device)
     result = getattr(dpnp, func)(x)
 
@@ -663,6 +674,41 @@ def test_2in_1out_diff_queue_but_equal_context(func, device):
         getattr(dpnp, func)(x1, x2)
 
 
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+@pytest.mark.parametrize(
+    "shape_pair",
+    [
+        ((2, 4), (4, 3)),
+        ((4, 2, 3), (4, 3, 5)),
+        ((6, 7, 4, 3), (6, 7, 3, 5)),
+    ],
+    ids=[
+        "((2, 4), (4, 3))",
+        "((4, 2, 3), (4, 3, 5))",
+        "((6, 7, 4, 3), (6, 7, 3, 5))",
+    ],
+)
+def test_matmul(device, shape_pair):
+    shape1, shape2 = shape_pair
+    a1 = numpy.arange(numpy.prod(shape1)).reshape(shape1)
+    a2 = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+
+    b1 = dpnp.asarray(a1, device=device)
+    b2 = dpnp.asarray(a2, device=device)
+
+    result = dpnp.matmul(b1, b2)
+    expected = numpy.matmul(a1, a2)
+    assert_allclose(expected, result)
+
+    result_queue = result.sycl_queue
+    assert_sycl_queue_equal(result_queue, b1.sycl_queue)
+    assert_sycl_queue_equal(result_queue, b2.sycl_queue)
+
+
 @pytest.mark.parametrize(
     "func, kwargs",
     [
@@ -922,19 +968,31 @@ def test_fft_rfft(type, shape, device):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
+@pytest.mark.parametrize(
+    "data, is_empty",
+    [
+        ([[1, -2], [2, 5]], False),
+        ([[[1, -2], [2, 5]], [[1, -2], [2, 5]]], False),
+        ((0, 0), True),
+        ((3, 0, 0), True),
+    ],
+    ids=["2D", "3D", "Empty_2D", "Empty_3D"],
+)
 @pytest.mark.parametrize(
     "device",
     valid_devices,
     ids=[device.filter_string for device in valid_devices],
 )
-def test_cholesky(device):
-    data = [[[1.0, -2.0], [2.0, 5.0]], [[1.0, -2.0], [2.0, 5.0]]]
-    numpy_data = numpy.array(data)
-    dpnp_data = dpnp.array(data, device=device)
+def test_cholesky(data, is_empty, device):
+    if is_empty:
+        numpy_data = numpy.empty(data, dtype=dpnp.default_float_type(device))
+    else:
+        numpy_data = numpy.array(data, dtype=dpnp.default_float_type(device))
+    dpnp_data = dpnp.array(numpy_data, device=device)
 
     result = dpnp.linalg.cholesky(dpnp_data)
     expected = numpy.linalg.cholesky(numpy_data)
-    assert_array_equal(expected, result)
+    assert_dtype_allclose(result, expected)
 
     expected_queue = dpnp_data.get_array().sycl_queue
     result_queue = result.get_array().sycl_queue
@@ -1081,21 +1139,42 @@ def test_eigvals(device):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
+@pytest.mark.parametrize(
+    "shape, is_empty",
+    [
+        ((2, 2), False),
+        ((3, 2, 2), False),
+        ((0, 0), True),
+        ((0, 2, 2), True),
+    ],
+    ids=[
+        "(2, 2)",
+        "(3, 2, 2)",
+        "(0, 0)",
+        "(0, 2, 2)",
+    ],
+)
 @pytest.mark.parametrize(
     "device",
     valid_devices,
     ids=[device.filter_string for device in valid_devices],
 )
-def test_inv(device):
-    data = [[1.0, 2.0], [3.0, 4.0]]
-    numpy_data = numpy.array(data)
-    dpnp_data = dpnp.array(data, device=device)
+def test_inv(shape, is_empty, device):
+    if is_empty:
+        numpy_x = numpy.empty(shape, dtype=dpnp.default_float_type(device))
+    else:
+        count_elem = numpy.prod(shape)
+        numpy_x = numpy.arange(
+            1, count_elem + 1, dtype=dpnp.default_float_type()
+        ).reshape(shape)
 
-    result = dpnp.linalg.inv(dpnp_data)
-    expected = numpy.linalg.inv(numpy_data)
-    assert_allclose(expected, result)
+    dpnp_x = dpnp.array(numpy_x, device=device)
 
-    expected_queue = dpnp_data.get_array().sycl_queue
+    result = dpnp.linalg.inv(dpnp_x)
+    expected = numpy.linalg.inv(numpy_x)
+    assert_dtype_allclose(result, expected)
+
+    expected_queue = dpnp_x.get_array().sycl_queue
     result_queue = result.get_array().sycl_queue
 
     assert_sycl_queue_equal(result_queue, expected_queue)
@@ -1396,6 +1475,17 @@ def test_diff_scalar_append(device, kwargs):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+def test_clip(device):
+    x = dpnp.arange(10, device=device)
+    y = dpnp.clip(x, 3, 7)
+    assert_sycl_queue_equal(x.sycl_queue, y.sycl_queue)
+
+
 @pytest.mark.parametrize("func", ["take", "take_along_axis"])
 @pytest.mark.parametrize(
     "device",
@@ -1418,6 +1508,31 @@ def test_take(func, device):
     assert_sycl_queue_equal(result_queue, expected_queue)
 
 
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+@pytest.mark.parametrize("sparse", [True, False], ids=["True", "False"])
+def test_indices(device, sparse):
+    sycl_queue = dpctl.SyclQueue(device)
+    grid = dpnp.indices((2, 3), sparse=sparse, sycl_queue=sycl_queue)
+    for dpnp_array in grid:
+        assert_sycl_queue_equal(dpnp_array.sycl_queue, sycl_queue)
+
+
+@pytest.mark.parametrize(
+    "device",
+    valid_devices,
+    ids=[device.filter_string for device in valid_devices],
+)
+@pytest.mark.parametrize("func", ["mgrid", "ogrid"])
+def test_grid(device, func):
+    sycl_queue = dpctl.SyclQueue(device)
+    x = getattr(dpnp, func)(sycl_queue=sycl_queue)[0:4]
+    assert_sycl_queue_equal(x.sycl_queue, sycl_queue)
+
+
 @pytest.mark.parametrize(
     "device",
     valid_devices,
@@ -1442,12 +1557,44 @@ def test_solve(device):
     assert_sycl_queue_equal(result_queue, dpnp_y.sycl_queue)
 
 
+@pytest.mark.parametrize(
+    "shape, is_empty",
+    [
+        ((2, 2), False),
+        ((3, 2, 2), False),
+        ((0, 0), True),
+        ((0, 2, 2), True),
+    ],
+    ids=[
+        "(2, 2)",
+        "(3, 2, 2)",
+        "(0, 0)",
+        "(0, 2, 2)",
+    ],
+)
 @pytest.mark.parametrize(
     "device",
     valid_devices,
     ids=[device.filter_string for device in valid_devices],
 )
-def test_clip(device):
-    x = dpnp.arange(10, device=device)
-    y = dpnp.clip(x, 3, 7)
-    assert_sycl_queue_equal(x.sycl_queue, y.sycl_queue)
+def test_slogdet(shape, is_empty, device):
+    if is_empty:
+        numpy_x = numpy.empty(shape, dtype=dpnp.default_float_type(device))
+    else:
+        count_elem = numpy.prod(shape)
+        numpy_x = numpy.arange(
+            1, count_elem + 1, dtype=dpnp.default_float_type(device)
+        ).reshape(shape)
+
+    dpnp_x = dpnp.array(numpy_x, device=device)
+
+    sign_result, logdet_result = dpnp.linalg.slogdet(dpnp_x)
+    sign_expected, logdet_expected = numpy.linalg.slogdet(numpy_x)
+    assert_allclose(logdet_expected, logdet_result, rtol=1e-3, atol=1e-4)
+    assert_allclose(sign_expected, sign_result)
+
+    sign_queue = sign_result.sycl_queue
+    logdet_queue = logdet_result.sycl_queue
+
+    assert_sycl_queue_equal(sign_queue, dpnp_x.sycl_queue)
+    assert_sycl_queue_equal(logdet_queue, dpnp_x.sycl_queue)
diff --git a/tests/test_umath.py b/tests/test_umath.py
index 35955c935bc..8e04a439bc9 100644
--- a/tests/test_umath.py
+++ b/tests/test_umath.py
@@ -2,15 +2,14 @@
 import pytest
 from numpy.testing import (
     assert_allclose,
-    assert_array_almost_equal,
-    assert_array_equal,
 )
 
 import dpnp
 
 from .helper import (
+    assert_dtype_allclose,
     get_all_dtypes,
-    get_complex_dtypes,
+    get_float_complex_dtypes,
     get_float_dtypes,
     has_support_aspect16,
     has_support_aspect64,
@@ -84,6 +83,9 @@ def test_umaths(test_cases):
     args = get_args(args_str, sh, xp=numpy)
     iargs = get_args(args_str, sh, xp=dpnp)
 
+    if umath == "reciprocal" and args[0].dtype in [numpy.int32, numpy.int64]:
+        pytest.skip("For integer input array, numpy.reciprocal returns zero.")
+
     # original
     expected = getattr(numpy, umath)(*args)
 
@@ -93,581 +95,158 @@ def test_umaths(test_cases):
     assert_allclose(result, expected, rtol=1e-6)
 
 
-class TestSin:
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
-    )
-    def test_sin(self, dtype):
-        np_array = numpy.arange(10, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.float64)
-
-        # DPNP
-        dp_out_dtype = dpnp.float32
-        if has_support_aspect64() and dtype != dpnp.float32:
-            dp_out_dtype = dpnp.float64
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.sin(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.sin(np_array, out=np_out)
-
-        precision = numpy.finfo(dtype=result.dtype).precision
-        assert_array_almost_equal(expected, result.asnumpy(), decimal=precision)
-
-    @pytest.mark.parametrize("dtype", get_complex_dtypes())
-    def test_sin_complex(self, dtype):
-        np_array = numpy.arange(10, 20, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.complex128)
-
-        # DPNP
-        dp_out_dtype = dpnp.complex64
-        if has_support_aspect64() and dtype != dpnp.complex64:
-            dp_out_dtype = dpnp.complex128
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.sin(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.sin(np_array, out=np_out)
-
-        precision = numpy.finfo(dtype=result.dtype).precision
-        assert_array_almost_equal(expected, result.asnumpy(), decimal=precision)
-
-    @pytest.mark.usefixtures("suppress_divide_numpy_warnings")
-    @pytest.mark.skipif(
-        not has_support_aspect16(), reason="No fp16 support by device"
-    )
-    def test_sin_bool(self):
-        np_array = numpy.arange(2, dtype=numpy.bool_)
-        np_out = numpy.empty(2, dtype=numpy.float16)
-
-        # DPNP
-        dp_array = dpnp.array(np_array, dtype=np_array.dtype)
-        dp_out = dpnp.array(np_out, dtype=np_out.dtype)
-        result = dpnp.sin(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.sin(np_array, out=np_out)
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize(
-        "dtype",
-        [numpy.float32, numpy.int64, numpy.int32],
-        ids=["numpy.float32", "numpy.int64", "numpy.int32"],
-    )
-    def test_invalid_dtype(self, dtype):
-        dp_array = dpnp.arange(10, dtype=dpnp.complex64)
-        dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.sin(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
-    )
-    def test_invalid_shape(self, shape):
-        dp_array = dpnp.arange(10)
-        dp_out = dpnp.empty(shape, dtype=dp_array.dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.sin(dp_array, out=dp_out)
-
-
-class TestSinh:
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
-    )
-    def test_sinh(self, dtype):
-        np_array = numpy.arange(10, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.float64)
-
-        # DPNP
-        dp_out_dtype = dpnp.float32
-        if has_support_aspect64() and dtype != dpnp.float32:
-            dp_out_dtype = dpnp.float64
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.sinh(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.sinh(np_array, out=np_out)
-
-        tol = numpy.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
-
-    @pytest.mark.parametrize("dtype", get_complex_dtypes())
-    def test_sinh_complex(self, dtype):
-        np_array = numpy.arange(10, 20, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.complex128)
-
-        # DPNP
-        dp_out_dtype = dpnp.complex64
-        if has_support_aspect64() and dtype != dpnp.complex64:
-            dp_out_dtype = dpnp.complex128
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.sinh(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.sinh(np_array, out=np_out)
-
-        tol = numpy.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
-
-    @pytest.mark.usefixtures("suppress_divide_numpy_warnings")
-    @pytest.mark.skipif(
-        not has_support_aspect16(), reason="No fp16 support by device"
-    )
-    def test_sinh_bool(self):
-        np_array = numpy.arange(2, dtype=numpy.bool_)
-        np_out = numpy.empty(2, dtype=numpy.float16)
-
-        # DPNP
-        dp_array = dpnp.array(np_array, dtype=np_array.dtype)
-        dp_out = dpnp.array(np_out, dtype=np_out.dtype)
-        result = dpnp.sinh(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.sinh(np_array, out=np_out)
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize(
-        "dtype",
-        [numpy.float32, numpy.int64, numpy.int32],
-        ids=["numpy.float32", "numpy.int64", "numpy.int32"],
-    )
-    def test_invalid_dtype(self, dtype):
-        dp_array = dpnp.arange(10, dtype=dpnp.complex64)
-        dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.sinh(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
-    )
-    def test_invalid_shape(self, shape):
-        dp_array = dpnp.arange(10)
-        dp_out = dpnp.empty(shape, dtype=dp_array.dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.sinh(dp_array, out=dp_out)
-
-
-class TestCos:
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
-    )
-    def test_cos(self, dtype):
-        np_array = numpy.arange(10, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.float64)
-
-        # DPNP
-        dp_out_dtype = dpnp.float32
-        if has_support_aspect64() and dtype != dpnp.float32:
-            dp_out_dtype = dpnp.float64
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.cos(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.cos(np_array, out=np_out)
-
-        precision = numpy.finfo(dtype=result.dtype).precision
-        assert_array_almost_equal(expected, result.asnumpy(), decimal=precision)
-
-    @pytest.mark.parametrize("dtype", get_complex_dtypes())
-    def test_cos_complex(self, dtype):
-        np_array = numpy.arange(10, 20, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.complex128)
-
-        # DPNP
-        dp_out_dtype = dpnp.complex64
-        if has_support_aspect64() and dtype != dpnp.complex64:
-            dp_out_dtype = dpnp.complex128
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.cos(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.cos(np_array, out=np_out)
-
-        precision = numpy.finfo(dtype=result.dtype).precision
-        assert_array_almost_equal(expected, result.asnumpy(), decimal=precision)
-
-    @pytest.mark.usefixtures("suppress_divide_numpy_warnings")
-    @pytest.mark.skipif(
-        not has_support_aspect16(), reason="No fp16 support by device"
-    )
-    def test_cos_bool(self):
-        np_array = numpy.arange(2, dtype=numpy.bool_)
-        np_out = numpy.empty(2, dtype=numpy.float16)
-
-        # DPNP
-        dp_array = dpnp.array(np_array, dtype=np_array.dtype)
-        dp_out = dpnp.array(np_out, dtype=np_out.dtype)
-        result = dpnp.cos(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.cos(np_array, out=np_out)
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize(
-        "dtype",
-        [numpy.float32, numpy.int64, numpy.int32],
-        ids=["numpy.float32", "numpy.int64", "numpy.int32"],
-    )
-    def test_invalid_dtype(self, dtype):
-        dp_array = dpnp.arange(10, dtype=dpnp.complex64)
-        dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.cos(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
-    )
-    def test_invalid_shape(self, shape):
-        dp_array = dpnp.arange(10)
-        dp_out = dpnp.empty(shape, dtype=dp_array.dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.cos(dp_array, out=dp_out)
-
-
-class TestCosh:
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
-    )
-    def test_cosh(self, dtype):
-        np_array = numpy.arange(10, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.float64)
-
-        # DPNP
-        dp_out_dtype = dpnp.float32
-        if has_support_aspect64() and dtype != dpnp.float32:
-            dp_out_dtype = dpnp.float64
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.cosh(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.cosh(np_array, out=np_out)
-
-        tol = numpy.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
-
-    @pytest.mark.parametrize("dtype", get_complex_dtypes())
-    def test_cosh_complex(self, dtype):
-        np_array = numpy.arange(10, 20, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.complex128)
-
-        # DPNP
-        dp_out_dtype = dpnp.complex64
-        if has_support_aspect64() and dtype != dpnp.complex64:
-            dp_out_dtype = dpnp.complex128
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.cosh(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.cosh(np_array, out=np_out)
-
-        tol = numpy.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
-
-    @pytest.mark.usefixtures("suppress_divide_numpy_warnings")
-    @pytest.mark.skipif(
-        not has_support_aspect16(), reason="No fp16 support by device"
-    )
-    def test_cosh_bool(self):
-        np_array = numpy.arange(2, dtype=numpy.bool_)
-        np_out = numpy.empty(2, dtype=numpy.float16)
-
-        # DPNP
-        dp_array = dpnp.array(np_array, dtype=np_array.dtype)
-        dp_out = dpnp.array(np_out, dtype=np_out.dtype)
-        result = dpnp.cosh(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.cosh(np_array, out=np_out)
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize(
-        "dtype",
-        [numpy.float32, numpy.int64, numpy.int32],
-        ids=["numpy.float32", "numpy.int64", "numpy.int32"],
-    )
-    def test_invalid_dtype(self, dtype):
-        dp_array = dpnp.arange(10, dtype=dpnp.complex64)
-        dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.cosh(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
-    )
-    def test_invalid_shape(self, shape):
-        dp_array = dpnp.arange(10)
-        dp_out = dpnp.empty(shape, dtype=dp_array.dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.cosh(dp_array, out=dp_out)
-
-
-class TestsLog:
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
-    )
-    def test_log(self, dtype):
-        np_array = numpy.arange(10, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.float64)
-
-        # DPNP
-        dp_out_dtype = dpnp.float32
-        if has_support_aspect64() and dtype != dpnp.float32:
-            dp_out_dtype = dpnp.float64
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.log(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.log(np_array, out=np_out)
+def _get_numpy_arrays(func_name, dtype, range):
+    """
+    Return a sample array and an output array.
+
+    Create an appropriate array specified by `dtype` and `range` which is used as
+    an input for a function specified by `func_name` to obtain the output.
+    """
+    low = range[0]
+    high = range[1]
+    size = range[2]
+    if dtype == numpy.bool_:
+        np_array = numpy.arange(2, dtype=dtype)
+        result = getattr(numpy, func_name)(np_array)
+    elif dpnp.issubdtype(dtype, dpnp.complexfloating):
+        a = numpy.random.uniform(low=low, high=high, size=size)
+        b = numpy.random.uniform(low=low, high=high, size=size)
+        np_array = numpy.array(a + 1j * b, dtype=dtype)
+        result = getattr(numpy, func_name)(np_array)
+    else:
+        a = numpy.random.uniform(low=low, high=high, size=size)
+        np_array = numpy.array(a, dtype=dtype)
+        result = getattr(numpy, func_name)(np_array)
 
-        precision = numpy.finfo(dtype=result.dtype).precision
-        assert_array_almost_equal(expected, result.asnumpy(), decimal=precision)
+    return np_array, result
 
-    @pytest.mark.parametrize("dtype", get_complex_dtypes())
-    def test_log_complex(self, dtype):
-        np_array = numpy.arange(10, 20, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.complex128)
 
-        # DPNP
-        dp_out_dtype = dpnp.complex64
+def _get_output_data_type(dtype):
+    """Return a data type specified by input `dtype` and device capabilities."""
+    if dpnp.issubdtype(dtype, dpnp.bool):
+        out_dtype = dpnp.float16 if has_support_aspect16() else dpnp.float32
+    elif dpnp.issubdtype(dtype, dpnp.complexfloating):
+        out_dtype = dpnp.complex64
         if has_support_aspect64() and dtype != dpnp.complex64:
-            dp_out_dtype = dpnp.complex128
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.log(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.log(np_array, out=np_out)
-
-        precision = numpy.finfo(dtype=result.dtype).precision
-        assert_array_almost_equal(expected, result.asnumpy(), decimal=precision)
-
-    @pytest.mark.usefixtures("suppress_divide_numpy_warnings")
-    @pytest.mark.skipif(
-        not has_support_aspect16(), reason="No fp16 support by device"
-    )
-    def test_log_bool(self):
-        np_array = numpy.arange(2, dtype=numpy.bool_)
-        np_out = numpy.empty(2, dtype=numpy.float16)
-
-        # DPNP
-        dp_array = dpnp.array(np_array, dtype=np_array.dtype)
-        dp_out = dpnp.array(np_out, dtype=np_out.dtype)
-        result = dpnp.log(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.log(np_array, out=np_out)
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize(
-        "dtype",
-        [numpy.float32, numpy.int64, numpy.int32],
-        ids=["numpy.float32", "numpy.int64", "numpy.int32"],
-    )
-    def test_invalid_dtype(self, dtype):
-        dp_array = dpnp.arange(10, dtype=dpnp.complex64)
-        dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.log(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
-    )
-    def test_invalid_shape(self, shape):
-        dp_array = dpnp.arange(10)
-        dp_out = dpnp.empty(shape, dtype=dp_array.dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.log(dp_array, out=dp_out)
-
-
-class TestExp:
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
-    )
-    def test_exp(self, dtype):
-        np_array = numpy.arange(10, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.float64)
-
-        # DPNP
-        dp_out_dtype = dpnp.float32
+            out_dtype = dpnp.complex128
+    else:
+        out_dtype = dpnp.float32
         if has_support_aspect64() and dtype != dpnp.float32:
-            dp_out_dtype = dpnp.float64
-
-        dp_array = dpnp.array(np_array, dtype=dp_out_dtype)
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        result = dpnp.exp(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.exp(np_array, out=np_out)
-
-        tol = numpy.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
-
-    @pytest.mark.parametrize("dtype", get_complex_dtypes())
-    def test_exp_complex(self, dtype):
-        x1 = numpy.linspace(0, 8, num=10)
-        x2 = numpy.linspace(0, 6, num=10)
-        Xnp = x1 + 1j * x2
-        np_array = numpy.asarray(Xnp, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.complex128)
-
-        # DPNP
-        dp_out_dtype = dpnp.complex64
-        if has_support_aspect64() and dtype != dpnp.complex64:
-            dp_out_dtype = dpnp.complex128
-
-        dp_array = dpnp.array(np_array, dtype=dp_out_dtype)
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        result = dpnp.exp(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.exp(np_array, out=np_out)
-
-        tol = numpy.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
-
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
-    )
-    def test_invalid_dtype(self, dtype):
-        dpnp_dtype = get_all_dtypes(no_complex=True, no_none=True)[-1]
+            out_dtype = dpnp.float64
+
+    return out_dtype
+
+
+class TestUmath:
+    @pytest.fixture(
+        params=[
+            {"func_name": "arccos", "input_values": [-1, 1, 10]},
+            {"func_name": "arccosh", "input_values": [1, 10, 10]},
+            {"func_name": "arcsin", "input_values": [-1, 1, 10]},
+            {"func_name": "arcsinh", "input_values": [-5, 5, 10]},
+            {"func_name": "arctan", "input_values": [-5, 5, 10]},
+            {"func_name": "arctanh", "input_values": [-1, 1, 10]},
+            {"func_name": "cos", "input_values": [-5, 5, 10]},
+            {"func_name": "cosh", "input_values": [-5, 5, 10]},
+            {"func_name": "exp", "input_values": [-3, 8, 10]},
+            {"func_name": "exp2", "input_values": [-5, 5, 10]},
+            {"func_name": "expm1", "input_values": [-5, 5, 10]},
+            {"func_name": "log", "input_values": [0, 10, 10]},
+            {"func_name": "log10", "input_values": [0, 10, 10]},
+            {"func_name": "log2", "input_values": [0, 10, 10]},
+            {"func_name": "log1p", "input_values": [0, 10, 10]},
+            {"func_name": "sin", "input_values": [-5, 5, 10]},
+            {"func_name": "sinh", "input_values": [-5, 5, 10]},
+            {"func_name": "sqrt", "input_values": [0, 10, 10]},
+            {"func_name": "tan", "input_values": [-1.5, 1.5, 10]},
+            {"func_name": "tanh", "input_values": [-5, 5, 10]},
+        ],
+        ids=[
+            "arccos",
+            "arccosh",
+            "arcsin",
+            "arcsinh",
+            "arctan",
+            "arctanh",
+            "cos",
+            "cosh",
+            "exp",
+            "exp2",
+            "expm1",
+            "log",
+            "log10",
+            "log2",
+            "log1p",
+            "sin",
+            "sinh",
+            "sqrt",
+            "tan",
+            "tnah",
+        ],
+    )
+    def func_params(self, request):
+        return request.param
+
+    @pytest.mark.usefixtures("suppress_divide_invalid_numpy_warnings")
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    def test_out(self, func_params, dtype):
+        func_name = func_params["func_name"]
+        input_values = func_params["input_values"]
+        np_array, expected = _get_numpy_arrays(func_name, dtype, input_values)
+
+        dp_array = dpnp.array(np_array)
+        out_dtype = _get_output_data_type(dtype)
+        dp_out = dpnp.empty(expected.shape, dtype=out_dtype)
+        result = getattr(dpnp, func_name)(dp_array, out=dp_out)
+
+        assert result is dp_out
+        assert_dtype_allclose(result, expected)
+
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)[:-1])
+    def test_invalid_dtype(self, func_params, dtype):
+        func_name = func_params["func_name"]
+        dpnp_dtype = get_all_dtypes(no_none=True)[-1]
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
 
-        with pytest.raises(TypeError):
-            dpnp.exp(dp_array, out=dp_out)
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
+            getattr(dpnp, func_name)(dp_array, out=dp_out)
 
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
     @pytest.mark.parametrize(
         "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
     )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
+    def test_invalid_shape(self, func_params, shape):
+        func_name = func_params["func_name"]
+        dp_array = dpnp.arange(10)
+        dp_out = dpnp.empty(shape)
         with pytest.raises(ValueError):
-            dpnp.exp(dp_array, out=dp_out)
-
+            getattr(dpnp, func_name)(dp_array, out=dp_out)
 
-class TestExp2:
     @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
-    )
-    def test_exp2(self, dtype):
-        np_array = numpy.arange(7, dtype=dtype)
-        np_out = numpy.empty(7, dtype=numpy.float64)
-
-        # DPNP
-        dp_out_dtype = dpnp.float32
-        if has_support_aspect64() and dtype != dpnp.float32:
-            dp_out_dtype = dpnp.float64
-
-        dp_array = dpnp.array(np_array, dtype=dp_out_dtype)
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        result = dpnp.exp2(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.exp2(np_array, out=np_out)
-
-        tol = dpnp.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
-
-    @pytest.mark.parametrize("dtype", get_complex_dtypes())
-    def test_exp2_complex(self, dtype):
-        x1 = numpy.linspace(0, 8, num=10)
-        x2 = numpy.linspace(0, 6, num=10)
-        Xnp = x1 + 1j * x2
-        np_array = numpy.asarray(Xnp, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.complex128)
-
-        # DPNP
-        dp_out_dtype = dpnp.complex64
-        if has_support_aspect64() and dtype != dpnp.complex64:
-            dp_out_dtype = dpnp.complex128
-
-        dp_array = dpnp.array(np_array, dtype=dp_out_dtype)
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        result = dpnp.exp2(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.exp2(np_array, out=np_out)
-
-        tol = dpnp.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
-
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
-    )
-    def test_invalid_dtype(self, dtype):
-        dpnp_dtype = get_all_dtypes(no_complex=True, no_none=True)[-1]
-        dp_array = dpnp.arange(10, dtype=dpnp_dtype)
-        dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.exp2(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
+        "out",
+        [4, (), [], (3, 7), [2, 4]],
+        ids=["4", "()", "[]", "(3, 7)", "[2, 4]"],
     )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.exp2(dp_array, out=dp_out)
+    def test_invalid_out(self, func_params, out):
+        func_name = func_params["func_name"]
+        a = dpnp.arange(10)
+        numpy.testing.assert_raises(TypeError, getattr(dpnp, func_name), a, out)
 
 
 class TestCbrt:
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
-    )
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
     def test_cbrt(self, dtype):
-        np_array = numpy.arange(7, dtype=dtype)
-        np_out = numpy.empty(7, dtype=numpy.float64)
-
-        # DPNP
-        dp_out_dtype = dpnp.float32
-        if has_support_aspect64() and dtype != dpnp.float32:
-            dp_out_dtype = dpnp.float64
+        np_array, expected = _get_numpy_arrays("cbrt", dtype, [-5, 5, 10])
 
-        dp_array = dpnp.array(np_array, dtype=dp_out_dtype)
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
+        dp_array = dpnp.array(np_array)
+        out_dtype = _get_output_data_type(dtype)
+        dp_out = dpnp.empty(expected.shape, dtype=out_dtype)
         result = dpnp.cbrt(dp_array, out=dp_out)
 
-        # original
-        expected = numpy.cbrt(np_array, out=np_out)
-
-        tol = dpnp.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
+        assert result is dp_out
+        assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize(
         "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
@@ -677,43 +256,36 @@ def test_invalid_dtype(self, dtype):
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
 
-        with pytest.raises(TypeError):
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
             dpnp.cbrt(dp_array, out=dp_out)
 
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
     @pytest.mark.parametrize(
         "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
     )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
+    def test_invalid_shape(self, shape):
+        dp_array = dpnp.arange(10)
+        dp_out = dpnp.empty(shape)
 
         with pytest.raises(ValueError):
             dpnp.cbrt(dp_array, out=dp_out)
 
 
 class TestRsqrt:
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
-    )
+    @pytest.mark.usefixtures("suppress_divide_numpy_warnings")
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
     def test_rsqrt(self, dtype):
-        np_array = numpy.arange(1, 10, dtype=dtype)
-        np_out = numpy.empty(9, dtype=numpy.float64)
+        np_array, expected = _get_numpy_arrays("sqrt", dtype, [0, 10, 10])
+        expected = numpy.reciprocal(expected)
 
-        # DPNP
-        dp_out_dtype = dpnp.float32
-        if has_support_aspect64() and dtype != dpnp.float32:
-            dp_out_dtype = dpnp.float64
-
-        dp_array = dpnp.array(np_array, dtype=dp_out_dtype)
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
+        dp_array = dpnp.array(np_array)
+        out_dtype = _get_output_data_type(dtype)
+        dp_out = dpnp.empty(expected.shape, dtype=out_dtype)
         result = dpnp.rsqrt(dp_array, out=dp_out)
 
-        # original
-        expected = numpy.reciprocal(numpy.sqrt(np_array), out=np_out)
-
-        tol = dpnp.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
+        assert result is dp_out
+        assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize(
         "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
@@ -723,206 +295,125 @@ def test_invalid_dtype(self, dtype):
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
 
-        with pytest.raises(TypeError):
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
             dpnp.rsqrt(dp_array, out=dp_out)
 
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
     @pytest.mark.parametrize(
         "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
     )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
+    def test_invalid_shape(self, shape):
+        dp_array = dpnp.arange(10)
+        dp_out = dpnp.empty(shape)
         with pytest.raises(ValueError):
             dpnp.rsqrt(dp_array, out=dp_out)
 
-
-class TestArccos:
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    @pytest.mark.usefixtures("suppress_invalid_numpy_warnings")
-    def test_arccos(self, dtype):
-        array_data = numpy.arange(-9, 10, 2) / 10
-        out = numpy.empty(10, dtype=dtype)
-
-        # DPNP
-        dp_array = dpnp.array(array_data, dtype=dtype)
-        dp_out = dpnp.array(out, dtype=dtype)
-        result = dpnp.arccos(dp_array, out=dp_out)
-
-        # original
-        np_array = numpy.array(array_data, dtype=dtype)
-        expected = numpy.arccos(np_array, out=out)
-
-        tol = numpy.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
-
     @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
-    )
-    def test_invalid_dtype(self, dtype):
-        dpnp_dtype = get_all_dtypes(no_complex=True, no_none=True)[-1]
-        dp_array = dpnp.arange(10, dtype=dpnp_dtype)
-        dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.arccos(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
+        "out",
+        [4, (), [], (3, 7), [2, 4]],
+        ids=["4", "()", "[]", "(3, 7)", "[2, 4]"],
     )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.arccos(dp_array, out=dp_out)
-
+    def test_invalid_out(self, out):
+        a = dpnp.arange(10)
+        numpy.testing.assert_raises(TypeError, dpnp.rsqrt, a, out)
 
-class TestArccosh:
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    @pytest.mark.usefixtures("suppress_invalid_numpy_warnings")
-    def test_arccosh(self, dtype):
-        array_data = numpy.arange(2, 12)
-        out = numpy.empty(10, dtype=dtype)
 
-        # DPNP
-        dp_array = dpnp.array(array_data, dtype=dtype)
-        dp_out = dpnp.array(out, dtype=dtype)
-        result = dpnp.arccosh(dp_array, out=dp_out)
+class TestSquare:
+    @pytest.mark.parametrize("dtype", get_all_dtypes())
+    def test_square(self, dtype):
+        np_array, expected = _get_numpy_arrays("square", dtype, [-5, 5, 10])
 
-        # original
-        np_array = numpy.array(array_data, dtype=dtype)
-        expected = numpy.arccosh(np_array, out=out)
+        dp_array = dpnp.array(np_array)
+        out_dtype = numpy.int8 if dtype == numpy.bool_ else dtype
+        dp_out = dpnp.empty(expected.shape, dtype=out_dtype)
+        result = dpnp.square(dp_array, out=dp_out)
 
-        tol = numpy.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
+        assert result is dp_out
+        assert_dtype_allclose(result, expected)
 
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
-    )
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_none=True)[:-1])
     def test_invalid_dtype(self, dtype):
-        dpnp_dtype = get_all_dtypes(no_complex=True, no_none=True)[-1]
+        dpnp_dtype = get_all_dtypes(no_none=True)[-1]
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
 
-        with pytest.raises(TypeError):
-            dpnp.arccosh(dp_array, out=dp_out)
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
+            dpnp.square(dp_array, out=dp_out)
 
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
     @pytest.mark.parametrize(
         "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
     )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
+    def test_invalid_shape(self, shape):
+        dp_array = dpnp.arange(10)
+        dp_out = dpnp.empty(shape)
         with pytest.raises(ValueError):
-            dpnp.arccosh(dp_array, out=dp_out)
-
-
-class TestArcsin:
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    @pytest.mark.usefixtures("suppress_invalid_numpy_warnings")
-    def test_arcsin(self, dtype):
-        array_data = numpy.arange(-9, 10, 2) / 10
-        out = numpy.empty(10, dtype=dtype)
-
-        # DPNP
-        dp_array = dpnp.array(array_data, dtype=dtype)
-        dp_out = dpnp.array(out, dtype=dtype)
-        result = dpnp.arcsin(dp_array, out=dp_out)
-
-        # original
-        np_array = numpy.array(array_data, dtype=dtype)
-        expected = numpy.arcsin(np_array, out=out)
-
-        tol = numpy.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
-
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
-    )
-    def test_invalid_dtype(self, dtype):
-        dpnp_dtype = get_all_dtypes(no_complex=True, no_none=True)[-1]
-        dp_array = dpnp.arange(10, dtype=dpnp_dtype)
-        dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.arcsin(dp_array, out=dp_out)
+            dpnp.square(dp_array, out=dp_out)
 
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
     @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
+        "out",
+        [4, (), [], (3, 7), [2, 4]],
+        ids=["4", "()", "[]", "(3, 7)", "[2, 4]"],
     )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.arcsin(dp_array, out=dp_out)
+    def test_invalid_out(self, out):
+        a = dpnp.arange(10)
 
+        numpy.testing.assert_raises(TypeError, dpnp.square, a, out)
+        numpy.testing.assert_raises(TypeError, numpy.square, a.asnumpy(), out)
 
-class TestArcsinh:
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    @pytest.mark.usefixtures("suppress_invalid_numpy_warnings")
-    def test_arcsinh(self, dtype):
-        array_data = numpy.arange(10)
-        out = numpy.empty(10, dtype=dtype)
 
-        # DPNP
-        dp_array = dpnp.array(array_data, dtype=dtype)
-        dp_out = dpnp.array(out, dtype=dtype)
-        result = dpnp.arcsinh(dp_array, out=dp_out)
+class TestReciprocal:
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes())
+    def test_reciprocal(self, dtype):
+        np_array, expected = _get_numpy_arrays("reciprocal", dtype, [-5, 5, 10])
 
-        # original
-        np_array = numpy.array(array_data, dtype=dtype)
-        expected = numpy.arcsinh(np_array, out=out)
+        dp_array = dpnp.array(np_array)
+        out_dtype = _get_output_data_type(dtype)
+        dp_out = dpnp.empty(expected.shape, dtype=out_dtype)
+        result = dpnp.reciprocal(dp_array, out=dp_out)
 
-        tol = numpy.finfo(dtype=result.dtype).resolution
-        assert_allclose(expected, result.asnumpy(), rtol=tol)
+        assert result is dp_out
+        assert_dtype_allclose(result, expected)
 
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
-    )
+    @pytest.mark.parametrize("dtype", get_float_complex_dtypes()[:-1])
     def test_invalid_dtype(self, dtype):
-        dpnp_dtype = get_all_dtypes(no_complex=True, no_none=True)[-1]
+        dpnp_dtype = get_float_complex_dtypes()[-1]
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
 
-        with pytest.raises(TypeError):
-            dpnp.arcsinh(dp_array, out=dp_out)
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
+            dpnp.reciprocal(dp_array, out=dp_out)
 
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
     @pytest.mark.parametrize(
         "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
     )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
+    def test_invalid_shape(self, shape):
+        dp_array = dpnp.arange(10)
+        dp_out = dpnp.empty(shape)
 
         with pytest.raises(ValueError):
-            dpnp.arcsinh(dp_array, out=dp_out)
-
+            dpnp.reciprocal(dp_array, out=dp_out)
 
-class TestArctan:
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    def test_arctan(self, dtype):
-        array_data = numpy.arange(10)
-        out = numpy.empty(10, dtype=dtype)
 
-        # DPNP
-        dp_array = dpnp.array(array_data, dtype=dtype)
-        dp_out = dpnp.array(out, dtype=dtype)
-        result = dpnp.arctan(dp_array, out=dp_out)
+class TestArctan2:
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+    def test_arctan2(self, dtype):
+        np_array1, _ = _get_numpy_arrays("array", dtype, [-5, 5, 10])
+        np_array2, _ = _get_numpy_arrays("array", dtype, [-5, 5, 10])
+        expected = numpy.arctan2(np_array1, np_array2)
 
-        # original
-        np_array = numpy.array(array_data, dtype=dtype)
-        expected = numpy.arctan(np_array, out=out)
+        dp_array1 = dpnp.array(np_array1)
+        dp_array2 = dpnp.array(np_array2)
+        out_dtype = _get_output_data_type(dtype)
+        dp_out = dpnp.empty(expected.shape, dtype=out_dtype)
+        result = dpnp.arctan2(dp_array1, dp_array2, out=dp_out)
 
-        tol = numpy.finfo(dtype).resolution
-        assert_allclose(expected, result, tol)
+        assert result is dp_out
+        assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize(
         "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
@@ -932,38 +423,36 @@ def test_invalid_dtype(self, dtype):
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
 
-        with pytest.raises(TypeError):
-            dpnp.arctan(dp_array, out=dp_out)
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
+            dpnp.arctan2(dp_array, dp_array, out=dp_out)
 
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
     @pytest.mark.parametrize(
         "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
     )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
+    def test_invalid_shape(self, shape):
+        dp_array = dpnp.arange(10)
+        dp_out = dpnp.empty(shape)
         with pytest.raises(ValueError):
-            dpnp.arctan(dp_array, out=dp_out)
-
+            dpnp.arctan2(dp_array, dp_array, out=dp_out)
 
-class TestArctanh:
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    def test_arctanh(self, dtype):
-        array_data = numpy.arange(-9, 10, 2) / 10
-        out = numpy.empty(10, dtype=dtype)
 
-        # DPNP
-        dp_array = dpnp.array(array_data, dtype=dtype)
-        dp_out = dpnp.array(out, dtype=dtype)
-        result = dpnp.arctanh(dp_array, out=dp_out)
+class TestCopySign:
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+    def test_copysign(self, dtype):
+        np_array1, _ = _get_numpy_arrays("array", dtype, [1, 10, 10])
+        np_array2, _ = _get_numpy_arrays("array", dtype, [-10, -1, 10])
+        expected = numpy.copysign(np_array1, np_array2)
 
-        # original
-        np_array = numpy.array(array_data, dtype=dtype)
-        expected = numpy.arctanh(np_array, out=out)
+        dp_array1 = dpnp.array(np_array1)
+        dp_array2 = dpnp.array(np_array2)
+        out_dtype = _get_output_data_type(dtype)
+        dp_out = dpnp.empty(expected.shape, dtype=out_dtype)
+        result = dpnp.copysign(dp_array1, dp_array2, out=dp_out)
 
-        tol = numpy.finfo(dtype).resolution
-        assert_allclose(expected, result, tol)
+        assert result is dp_out
+        assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize(
         "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
@@ -972,39 +461,36 @@ def test_invalid_dtype(self, dtype):
         dpnp_dtype = get_all_dtypes(no_complex=True, no_none=True)[-1]
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
+            dpnp.copysign(dp_array, dp_array, out=dp_out)
 
-        with pytest.raises(TypeError):
-            dpnp.arctanh(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
     @pytest.mark.parametrize(
         "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
     )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
+    def test_invalid_shape(self, shape):
+        dp_array = dpnp.arange(10)
+        dp_out = dpnp.empty(shape)
         with pytest.raises(ValueError):
-            dpnp.arctanh(dp_array, out=dp_out)
-
+            dpnp.copysign(dp_array, dp_array, out=dp_out)
 
-class TestTan:
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    def test_tan(self, dtype):
-        array_data = numpy.arange(10)
-        out = numpy.empty(10, dtype=dtype)
 
-        # DPNP
-        dp_array = dpnp.array(array_data, dtype=dtype)
-        dp_out = dpnp.array(out, dtype=dtype)
-        result = dpnp.tan(dp_array, out=dp_out)
+class TestLogaddexp:
+    @pytest.mark.parametrize("dtype", get_all_dtypes(no_complex=True))
+    def test_logaddexp(self, dtype):
+        np_array1, _ = _get_numpy_arrays("array", dtype, [-5, 5, 10])
+        np_array2, _ = _get_numpy_arrays("array", dtype, [-5, 5, 10])
+        expected = numpy.logaddexp(np_array1, np_array2)
 
-        # original
-        np_array = numpy.array(array_data, dtype=dtype)
-        expected = numpy.tan(np_array, out=out)
+        dp_array1 = dpnp.array(np_array1)
+        dp_array2 = dpnp.array(np_array2)
+        out_dtype = _get_output_data_type(dtype)
+        dp_out = dpnp.empty(expected.shape, dtype=out_dtype)
+        result = dpnp.logaddexp(dp_array1, dp_array2, out=dp_out)
 
-        tol = numpy.finfo(dtype).resolution
-        assert_allclose(expected, result, rtol=tol)
+        assert result is dp_out
+        assert_dtype_allclose(result, expected)
 
     @pytest.mark.parametrize(
         "dtype", get_all_dtypes(no_complex=True, no_none=True)[:-1]
@@ -1013,265 +499,16 @@ def test_invalid_dtype(self, dtype):
         dpnp_dtype = get_all_dtypes(no_complex=True, no_none=True)[-1]
         dp_array = dpnp.arange(10, dtype=dpnp_dtype)
         dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.tan(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
-    )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.tan(dp_array, out=dp_out)
-
-
-class TestArctan2:
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    def test_arctan2(self, dtype):
-        array_data = numpy.arange(10)
-        out = numpy.empty(10, dtype=dtype)
-
-        # DPNP
-        dp_array = dpnp.array(array_data, dtype=dtype)
-        dp_out = dpnp.array(out, dtype=dtype)
-        result = dpnp.arctan2(dp_array, dp_array, out=dp_out)
-
-        # original
-        np_array = numpy.array(array_data, dtype=dtype)
-        expected = numpy.arctan2(np_array, np_array, out=out)
-
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)
-    )
-    def test_out_dtypes(self, dtype):
-        if has_support_aspect64() and dtype != numpy.float32:
-            dtype_out = numpy.float64
-        else:
-            dtype_out = numpy.float32
-        size = 2 if dtype == dpnp.bool else 10
-
-        np_array = numpy.arange(size, dtype=dtype)
-        np_out = numpy.empty(size, dtype=dtype_out)
-        expected = numpy.arctan2(np_array, np_array, out=np_out)
-
-        dp_array = dpnp.arange(size, dtype=dtype)
-        dp_out = dpnp.empty(size, dtype=dtype_out)
-        result = dpnp.arctan2(dp_array, dp_array, out=dp_out)
-
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
-    )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.arctan2(dp_array, dp_array, out=dp_out)
-
-
-class TestCopySign:
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    def test_copysign(self, dtype):
-        array_data = numpy.arange(10)
-        out = numpy.empty(10, dtype=dtype)
-
-        # DPNP
-        dp_array = dpnp.array(array_data, dtype=dtype)
-        dp_out = dpnp.array(out, dtype=dtype)
-        result = dpnp.copysign(dp_array, -dp_array, out=dp_out)
-
-        # original
-        np_array = numpy.array(array_data, dtype=dtype)
-        expected = numpy.copysign(np_array, -np_array, out=out)
-
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True, no_none=True)
-    )
-    def test_out_dtypes(self, dtype):
-        if has_support_aspect64() and dtype != numpy.float32:
-            dtype_out = numpy.float64
-        else:
-            dtype_out = numpy.float32
-        size = 2 if dtype == dpnp.bool else 10
-
-        np_array = numpy.arange(size, dtype=dtype)
-        np_out = numpy.empty(size, dtype=dtype_out)
-        expected = numpy.copysign(np_array, -np_array, out=np_out)
-
-        dp_array = dpnp.arange(size, dtype=dtype)
-        dp_out = dpnp.empty(size, dtype=dtype_out)
-        result = dpnp.copysign(dp_array, -dp_array, out=dp_out)
-
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize("dtype", get_float_dtypes())
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
-    )
-    def test_invalid_shape(self, shape, dtype):
-        dp_array = dpnp.arange(10, dtype=dtype)
-        dp_out = dpnp.empty(shape, dtype=dtype)
-
-        with pytest.raises(ValueError):
-            dpnp.copysign(dp_array, dp_array, out=dp_out)
-
-
-class TestSqrt:
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_complex=True)
-    )
-    def test_sqrt_int_float(self, dtype):
-        np_array = numpy.arange(10, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.float64)
-
-        # DPNP
-        dp_out_dtype = dpnp.float32
-        if has_support_aspect64() and dtype != dpnp.float32:
-            dp_out_dtype = dpnp.float64
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.sqrt(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.sqrt(np_array, out=np_out)
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize("dtype", get_complex_dtypes())
-    def test_sqrt_complex(self, dtype):
-        np_array = numpy.arange(10, 20, dtype=dtype)
-        np_out = numpy.empty(10, dtype=numpy.complex128)
-
-        # DPNP
-        dp_out_dtype = dpnp.complex64
-        if has_support_aspect64() and dtype != dpnp.complex64:
-            dp_out_dtype = dpnp.complex128
-
-        dp_out = dpnp.array(np_out, dtype=dp_out_dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.sqrt(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.sqrt(np_array, out=np_out)
-        assert_allclose(expected, result)
-
-    @pytest.mark.usefixtures("suppress_divide_numpy_warnings")
-    @pytest.mark.skipif(
-        not has_support_aspect16(), reason="No fp16 support by device"
-    )
-    def test_sqrt_bool(self):
-        np_array = numpy.arange(2, dtype=numpy.bool_)
-        np_out = numpy.empty(2, dtype=numpy.float16)
-
-        # DPNP
-        dp_array = dpnp.array(np_array, dtype=np_array.dtype)
-        dp_out = dpnp.array(np_out, dtype=np_out.dtype)
-        result = dpnp.sqrt(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.sqrt(np_array, out=np_out)
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize(
-        "dtype", [numpy.int64, numpy.int32], ids=["numpy.int64", "numpy.int32"]
-    )
-    def test_invalid_dtype(self, dtype):
-        dp_array = dpnp.arange(10, dtype=dpnp.float32)
-        dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.sqrt(dp_array, out=dp_out)
+        # TODO: change it to ValueError, when dpctl
+        # is being used in internal CI
+        with pytest.raises((TypeError, ValueError)):
+            dpnp.logaddexp(dp_array, dp_array, out=dp_out)
 
     @pytest.mark.parametrize(
         "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
     )
     def test_invalid_shape(self, shape):
-        dp_array = dpnp.arange(10, dtype=dpnp.float32)
-        dp_out = dpnp.empty(shape, dtype=dpnp.float32)
-
-        with pytest.raises(ValueError):
-            dpnp.sqrt(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize(
-        "out",
-        [4, (), [], (3, 7), [2, 4]],
-        ids=["4", "()", "[]", "(3, 7)", "[2, 4]"],
-    )
-    def test_invalid_out(self, out):
-        a = dpnp.arange(10)
-
-        numpy.testing.assert_raises(TypeError, dpnp.sqrt, a, out)
-        numpy.testing.assert_raises(TypeError, numpy.sqrt, a.asnumpy(), out)
-
-
-class TestSquare:
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_none=True)
-    )
-    def test_square(self, dtype):
-        np_array = numpy.arange(10, dtype=dtype)
-        np_out = numpy.empty(10, dtype=dtype)
-
-        # DPNP
-        dp_out = dpnp.array(np_out, dtype=dtype)
-        dp_array = dpnp.array(np_array, dtype=dtype)
-        result = dpnp.square(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.square(np_array, out=np_out)
-        assert_allclose(expected, result)
-
-    def test_square_bool(self):
-        np_array = numpy.arange(2, dtype=numpy.bool_)
-        np_out = numpy.empty(2, dtype=numpy.int8)
-
-        # DPNP
-        dp_array = dpnp.array(np_array, dtype=np_array.dtype)
-        dp_out = dpnp.array(np_out, dtype=np_out.dtype)
-        result = dpnp.square(dp_array, out=dp_out)
-
-        # original
-        expected = numpy.square(np_array, out=np_out)
-        assert_allclose(expected, result)
-
-    @pytest.mark.parametrize(
-        "dtype", get_all_dtypes(no_bool=True, no_none=True)
-    )
-    def test_invalid_dtype(self, dtype):
-        dp_array = dpnp.ones(10, dtype=dpnp.bool)
-        dp_out = dpnp.empty(10, dtype=dtype)
-
-        with pytest.raises(TypeError):
-            dpnp.square(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize(
-        "shape", [(0,), (15,), (2, 2)], ids=["(0,)", "(15, )", "(2,2)"]
-    )
-    def test_invalid_shape(self, shape):
-        dp_array = dpnp.arange(10, dtype=dpnp.float32)
-        dp_out = dpnp.empty(shape, dtype=dpnp.float32)
-
+        dp_array = dpnp.arange(10)
+        dp_out = dpnp.empty(shape)
         with pytest.raises(ValueError):
-            dpnp.square(dp_array, out=dp_out)
-
-    @pytest.mark.parametrize(
-        "out",
-        [4, (), [], (3, 7), [2, 4]],
-        ids=["4", "()", "[]", "(3, 7)", "[2, 4]"],
-    )
-    def test_invalid_out(self, out):
-        a = dpnp.arange(10)
-
-        numpy.testing.assert_raises(TypeError, dpnp.square, a, out)
-        numpy.testing.assert_raises(TypeError, numpy.square, a.asnumpy(), out)
+            dpnp.logaddexp(dp_array, dp_array, out=dp_out)
diff --git a/tests/test_usm_type.py b/tests/test_usm_type.py
index 3b5bbbbe696..ada68ebfa6c 100644
--- a/tests/test_usm_type.py
+++ b/tests/test_usm_type.py
@@ -352,6 +352,39 @@ def test_coerced_usm_types_bitwise_op(op, usm_type_x, usm_type_y):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
 
 
+@pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize(
+    "shape_pair",
+    [
+        ((2, 4), (4, 3)),
+        ((2, 0), (0, 3)),
+        ((2, 4), (4, 0)),
+        ((4, 2, 3), (4, 3, 5)),
+        ((6, 7, 4, 3), (6, 7, 3, 5)),
+    ],
+    ids=[
+        "((2, 4), (4, 3))",
+        "((2, 0), (0, 3))",
+        "((2, 4), (4, 0))",
+        "((4, 2, 3), (4, 3, 5))",
+        "((6, 7, 4, 3), (6, 7, 3, 5))",
+    ],
+)
+def test_matmul(usm_type_x, usm_type_y, shape_pair):
+    shape1, shape2 = shape_pair
+    x = numpy.arange(numpy.prod(shape1)).reshape(shape1)
+    y = numpy.arange(numpy.prod(shape2)).reshape(shape2)
+
+    x = dp.array(x, usm_type=usm_type_x)
+    y = dp.array(y, usm_type=usm_type_y)
+    z = dp.matmul(x, y)
+
+    assert x.usm_type == usm_type_x
+    assert y.usm_type == usm_type_y
+    assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_y])
+
+
 @pytest.mark.parametrize("usm_type_x", list_of_usm_types, ids=list_of_usm_types)
 @pytest.mark.parametrize("usm_type_y", list_of_usm_types, ids=list_of_usm_types)
 def test_meshgrid(usm_type_x, usm_type_y):
@@ -365,7 +398,9 @@ def test_meshgrid(usm_type_x, usm_type_y):
 @pytest.mark.parametrize(
     "func,data",
     [
+        pytest.param("average", [1.0, 2.0, 4.0, 7.0]),
         pytest.param("abs", [-1.2, 1.2]),
+        pytest.param("angle", [[1.0 + 1.0j, 2.0 + 3.0j]]),
         pytest.param("arccos", [-0.5, 0.0, 0.5]),
         pytest.param("arccosh", [1.5, 3.5, 5.0]),
         pytest.param("arcsin", [-0.5, 0.0, 0.5]),
@@ -374,6 +409,7 @@ def test_meshgrid(usm_type_x, usm_type_y):
         pytest.param("arctanh", [-0.5, 0.0, 0.5]),
         pytest.param("argmax", [1.0, 2.0, 4.0, 7.0]),
         pytest.param("argmin", [1.0, 2.0, 4.0, 7.0]),
+        pytest.param("argsort", [2.0, 1.0, 7.0, 4.0]),
         pytest.param("cbrt", [1, 8, 27]),
         pytest.param("ceil", [-1.7, -1.5, -0.2, 0.2, 1.5, 1.7, 2.0]),
         pytest.param("conjugate", [[1.0 + 1.0j, 0.0], [0.0, 1.0 + 1.0j]]),
@@ -401,8 +437,11 @@ def test_meshgrid(usm_type_x, usm_type_y):
         pytest.param("nanargmax", [1.0, 2.0, 4.0, dp.nan]),
         pytest.param("nanargmin", [1.0, 2.0, 4.0, dp.nan]),
         pytest.param("nanmax", [1.0, 2.0, 4.0, dp.nan]),
+        pytest.param("nanmean", [1.0, 2.0, 4.0, dp.nan]),
         pytest.param("nanmin", [1.0, 2.0, 4.0, dp.nan]),
         pytest.param("nanprod", [1.0, 2.0, dp.nan]),
+        pytest.param("nanstd", [1.0, 2.0, 4.0, dp.nan]),
+        pytest.param("nansum", [1.0, 2.0, 4.0, dp.nan]),
         pytest.param("nanvar", [1.0, 2.0, 4.0, dp.nan]),
         pytest.param("negative", [1.0, 0.0, -1.0]),
         pytest.param("positive", [1.0, 0.0, -1.0]),
@@ -412,6 +451,7 @@ def test_meshgrid(usm_type_x, usm_type_y):
         pytest.param(
             "real", [complex(1.0, 2.0), complex(3.0, 4.0), complex(5.0, 6.0)]
         ),
+        pytest.param("reciprocal", [1.0, 2.0, 4.0, 7.0]),
         pytest.param("reduce_hypot", [1.0, 2.0, 4.0, 7.0]),
         pytest.param("rsqrt", [1, 8, 27]),
         pytest.param("sign", [-5.0, 0.0, 4.5]),
@@ -420,8 +460,11 @@ def test_meshgrid(usm_type_x, usm_type_y):
             "sin", [-dp.pi / 2, -dp.pi / 4, 0.0, dp.pi / 4, dp.pi / 2]
         ),
         pytest.param("sinh", [-5.0, -3.5, 0.0, 3.5, 5.0]),
+        pytest.param("sort", [2.0, 1.0, 7.0, 4.0]),
         pytest.param("sqrt", [1.0, 3.0, 9.0]),
+        pytest.param("square", [1.0, 3.0, 9.0]),
         pytest.param("std", [1.0, 2.0, 4.0, 7.0]),
+        pytest.param("sum", [1.0, 2.0]),
         pytest.param(
             "tan", [-dp.pi / 2, -dp.pi / 4, 0.0, dp.pi / 4, dp.pi / 2]
         ),
@@ -521,6 +564,55 @@ def test_take(func, usm_type_x, usm_type_ind):
     assert z.usm_type == du.get_coerced_usm_type([usm_type_x, usm_type_ind])
 
 
+@pytest.mark.parametrize(
+    "data, is_empty",
+    [
+        ([[1, -2], [2, 5]], False),
+        ([[[1, -2], [2, 5]], [[1, -2], [2, 5]]], False),
+        ((0, 0), True),
+        ((3, 0, 0), True),
+    ],
+    ids=["2D", "3D", "Empty_2D", "Empty_3D"],
+)
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_cholesky(data, is_empty, usm_type):
+    if is_empty:
+        x = dp.empty(data, dtype=dp.default_float_type(), usm_type=usm_type)
+    else:
+        x = dp.array(data, dtype=dp.default_float_type(), usm_type=usm_type)
+
+    result = dp.linalg.cholesky(x)
+
+    assert x.usm_type == result.usm_type
+
+
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_indices(usm_type):
+    x = dp.indices((2,), usm_type=usm_type)
+    assert x.usm_type == usm_type
+
+
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("func", ["mgrid", "ogrid"])
+def test_grid(usm_type, func):
+    assert getattr(dp, func)(usm_type=usm_type)[0:4].usm_type == usm_type
+
+
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize("sparse", [True, False], ids=["True", "False"])
+def test_indices_sparse(usm_type, sparse):
+    x = dp.indices((2, 3), sparse=sparse, usm_type=usm_type)
+    for i in x:
+        assert i.usm_type == usm_type
+
+
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+def test_clip(usm_type):
+    x = dp.arange(10, usm_type=usm_type)
+    y = dp.clip(x, 2, 7)
+    assert x.usm_type == y.usm_type
+
+
 @pytest.mark.parametrize(
     "usm_type_matrix", list_of_usm_types, ids=list_of_usm_types
 )
@@ -560,7 +652,91 @@ def test_solve(matrix, vector, usm_type_matrix, usm_type_vector):
 
 
 @pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
-def test_clip(usm_type):
-    x = dp.arange(10, usm_type=usm_type)
-    y = dp.clip(x, 2, 7)
-    assert x.usm_type == y.usm_type
+@pytest.mark.parametrize(
+    "shape, is_empty",
+    [
+        ((2, 2), False),
+        ((3, 2, 2), False),
+        ((0, 0), True),
+        ((0, 2, 2), True),
+    ],
+    ids=[
+        "(2, 2)",
+        "(3, 2, 2)",
+        "(0, 0)",
+        "(0, 2, 2)",
+    ],
+)
+def test_slogdet(shape, is_empty, usm_type):
+    if is_empty:
+        x = dp.empty(shape, dtype=dp.default_float_type(), usm_type=usm_type)
+    else:
+        count_elem = numpy.prod(shape)
+        x = dp.arange(
+            1, count_elem + 1, dtype=dp.default_float_type(), usm_type=usm_type
+        ).reshape(shape)
+
+    sign, logdet = dp.linalg.slogdet(x)
+
+    assert x.usm_type == sign.usm_type
+    assert x.usm_type == logdet.usm_type
+
+
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize(
+    "shape, is_empty",
+    [
+        ((2, 2), False),
+        ((3, 2, 2), False),
+        ((0, 0), True),
+        ((0, 2, 2), True),
+    ],
+    ids=[
+        "(2, 2)",
+        "(3, 2, 2)",
+        "(0, 0)",
+        "(0, 2, 2)",
+    ],
+)
+def test_det(shape, is_empty, usm_type):
+    if is_empty:
+        x = dp.empty(shape, dtype=dp.default_float_type(), usm_type=usm_type)
+    else:
+        count_elem = numpy.prod(shape)
+        x = dp.arange(
+            1, count_elem + 1, dtype=dp.default_float_type(), usm_type=usm_type
+        ).reshape(shape)
+
+    det = dp.linalg.det(x)
+
+    assert x.usm_type == det.usm_type
+
+
+@pytest.mark.parametrize("usm_type", list_of_usm_types, ids=list_of_usm_types)
+@pytest.mark.parametrize(
+    "shape, is_empty",
+    [
+        ((2, 2), False),
+        ((3, 2, 2), False),
+        ((0, 0), True),
+        ((0, 2, 2), True),
+    ],
+    ids=[
+        "(2, 2)",
+        "(3, 2, 2)",
+        "(0, 0)",
+        "(0, 2, 2)",
+    ],
+)
+def test_inv(shape, is_empty, usm_type):
+    if is_empty:
+        x = dp.empty(shape, dtype=dp.default_float_type(), usm_type=usm_type)
+    else:
+        count_elem = numpy.prod(shape)
+        x = dp.arange(
+            1, count_elem + 1, dtype=dp.default_float_type(), usm_type=usm_type
+        ).reshape(shape)
+
+    result = dp.linalg.inv(x)
+
+    assert x.usm_type == result.usm_type
diff --git a/tests/testing/array.py b/tests/testing/array.py
index b0ce149217d..45d8eb47b17 100644
--- a/tests/testing/array.py
+++ b/tests/testing/array.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/tests/tests_perf/data_generator.py b/tests/tests_perf/data_generator.py
index 87b8009dac4..ef178c8896b 100755
--- a/tests/tests_perf/data_generator.py
+++ b/tests/tests_perf/data_generator.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/tests/tests_perf/test_perf_base.py b/tests/tests_perf/test_perf_base.py
index 8b58f212a0e..5fcbc022bc1 100755
--- a/tests/tests_perf/test_perf_base.py
+++ b/tests/tests_perf/test_perf_base.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py b/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py
index 4f807e3b864..8dfe304da29 100644
--- a/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py
+++ b/tests/third_party/cupy/core_tests/test_ndarray_complex_ops.py
@@ -4,10 +4,10 @@
 import pytest
 
 import dpnp as cupy
+from tests.helper import has_support_aspect64
 from tests.third_party.cupy import testing
 
 
-@testing.gpu
 class TestConj(unittest.TestCase):
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_almost_equal()
@@ -38,10 +38,9 @@ def test_conjugate_pass(self, xp, dtype):
         return y
 
 
-@testing.gpu
 class TestAngle(unittest.TestCase):
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_array_almost_equal()
+    @testing.numpy_cupy_array_almost_equal(type_check=has_support_aspect64())
     def test_angle(self, xp, dtype):
         x = testing.shaped_arange((2, 3), xp, dtype)
         return xp.angle(x)
@@ -154,7 +153,6 @@ def test_imag_inplace(self, dtype):
         assert cupy.all(x == expected)
 
 
-@testing.gpu
 class TestScalarConversion(unittest.TestCase):
     @testing.for_all_dtypes()
     def test_scalar_conversion(self, dtype):
diff --git a/tests/third_party/cupy/core_tests/test_ndarray_unary_op.py b/tests/third_party/cupy/core_tests/test_ndarray_unary_op.py
index 9622decd981..86bf028aa01 100644
--- a/tests/third_party/cupy/core_tests/test_ndarray_unary_op.py
+++ b/tests/third_party/cupy/core_tests/test_ndarray_unary_op.py
@@ -124,7 +124,14 @@ def test_invert_array(self):
     @testing.numpy_cupy_allclose(accept_error=TypeError)
     def check_zerodim_op(self, op, xp, dtype):
         a = xp.array(-2).astype(dtype)
-        return op(a)
+        try:
+            return op(a)
+        except ValueError:
+            # When op is operator.invert and dtype is inexact,
+            # NumPy raises TypeError while DPNP raises ValueError.
+            # With this logic, when ValueError is raised in DPNP,
+            # it is changed to TypeError to align with Numpy.
+            raise TypeError
 
     def test_invert_zerodim(self):
         self.check_zerodim_op(operator.invert)
diff --git a/tests/third_party/cupy/creation_tests/test_ranges.py b/tests/third_party/cupy/creation_tests/test_ranges.py
index 623adc409b7..83170cb3e37 100644
--- a/tests/third_party/cupy/creation_tests/test_ranges.py
+++ b/tests/third_party/cupy/creation_tests/test_ranges.py
@@ -363,7 +363,7 @@ def test_mgrid1(self, xp):
     def test_mgrid2(self, xp):
         return xp.mgrid[-10:10:10j]
 
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_allclose(rtol=1e-4, type_check=has_support_aspect64())
     def test_mgrid3(self, xp):
         x = xp.zeros(10)[:, None]
         y = xp.ones(10)[:, None]
@@ -374,7 +374,7 @@ def test_mgrid4(self, xp):
         # check len(keys) > 1
         return xp.mgrid[-10:10:10j, -10:10:10j]
 
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_allclose(rtol=1e-4, type_check=has_support_aspect64())
     def test_mgrid5(self, xp):
         # check len(keys) > 1
         x = xp.zeros(10)[:, None]
@@ -396,18 +396,18 @@ def test_ogrid1(self, xp):
     def test_ogrid2(self, xp):
         return xp.ogrid[-10:10:10j]
 
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_allclose(rtol=1e-4, type_check=has_support_aspect64())
     def test_ogrid3(self, xp):
         x = xp.zeros(10)[:, None]
         y = xp.ones(10)[:, None]
         return xp.ogrid[x:y:10j]
 
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_allclose(rtol=1e-4, type_check=has_support_aspect64())
     def test_ogrid4(self, xp):
         # check len(keys) > 1
         return xp.ogrid[-10:10:10j, -10:10:10j]
 
-    @testing.numpy_cupy_array_equal()
+    @testing.numpy_cupy_allclose(rtol=1e-4, type_check=has_support_aspect64())
     def test_ogrid5(self, xp):
         # check len(keys) > 1
         x = xp.zeros(10)[:, None]
diff --git a/tests/third_party/cupy/indexing_tests/test_generate.py b/tests/third_party/cupy/indexing_tests/test_generate.py
index 58b976c3bd0..dbec17358b2 100644
--- a/tests/third_party/cupy/indexing_tests/test_generate.py
+++ b/tests/third_party/cupy/indexing_tests/test_generate.py
@@ -7,7 +7,6 @@
 from tests.third_party.cupy import testing
 
 
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
 @testing.gpu
 class TestIndices(unittest.TestCase):
     @testing.for_all_dtypes()
diff --git a/tests/third_party/cupy/linalg_tests/test_decomposition.py b/tests/third_party/cupy/linalg_tests/test_decomposition.py
new file mode 100644
index 00000000000..42bcf122ff4
--- /dev/null
+++ b/tests/third_party/cupy/linalg_tests/test_decomposition.py
@@ -0,0 +1,137 @@
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from tests.helper import has_support_aspect64, is_cpu_device
+from tests.third_party.cupy import testing
+
+
+def random_matrix(shape, dtype, scale, sym=False):
+    m, n = shape[-2:]
+    dtype = numpy.dtype(dtype)
+    assert dtype.kind in "iufc"
+    low_s, high_s = scale
+    bias = None
+    if dtype.kind in "iu":
+        # For an m \times n matrix M whose element is in [-0.5, 0.5], it holds
+        # (singular value of M) <= \sqrt{mn} / 2
+        err = numpy.sqrt(m * n) / 2.0
+        low_s += err
+        high_s -= err
+        if dtype.kind in "u":
+            assert sym, (
+                "generating nonsymmetric matrix with uint cells is not"
+                " supported."
+            )
+            # (singular value of numpy.ones((m, n))) <= \sqrt{mn}
+            high_s = bias = high_s / (1 + numpy.sqrt(m * n))
+    assert low_s <= high_s
+    a = numpy.random.standard_normal(shape)
+    if dtype.kind == "c":
+        a = a + 1j * numpy.random.standard_normal(shape)
+    u, s, vh = numpy.linalg.svd(a)
+    if sym:
+        assert m == n
+        vh = u.conj().swapaxes(-1, -2)
+    new_s = numpy.random.uniform(low_s, high_s, s.shape)
+    new_a = numpy.einsum("...ij,...j,...jk->...ik", u, new_s, vh)
+    if bias is not None:
+        new_a += bias
+    if dtype.kind in "iu":
+        new_a = numpy.rint(new_a)
+    return new_a.astype(dtype)
+
+
+class TestCholeskyDecomposition:
+    @testing.numpy_cupy_allclose(atol=1e-3, type_check=has_support_aspect64())
+    def check_L(self, array, xp):
+        a = xp.asarray(array)
+        return xp.linalg.cholesky(a)
+
+    @testing.for_dtypes(
+        [
+            numpy.int32,
+            numpy.int64,
+            numpy.uint32,
+            numpy.uint64,
+            numpy.float32,
+            numpy.float64,
+            numpy.complex64,
+            numpy.complex128,
+        ]
+    )
+    def test_decomposition(self, dtype):
+        # A positive definite matrix
+        A = random_matrix((5, 5), dtype, scale=(10, 10000), sym=True)
+        self.check_L(A)
+        # np.linalg.cholesky only uses a lower triangle of an array
+        self.check_L(numpy.array([[1, 2], [1, 9]], dtype))
+
+    @testing.for_dtypes(
+        [
+            numpy.int32,
+            numpy.int64,
+            numpy.uint32,
+            numpy.uint64,
+            numpy.float32,
+            numpy.float64,
+            numpy.complex64,
+            numpy.complex128,
+        ]
+    )
+    def test_batched_decomposition(self, dtype):
+        Ab1 = random_matrix((3, 5, 5), dtype, scale=(10, 10000), sym=True)
+        self.check_L(Ab1)
+        Ab2 = random_matrix((2, 2, 5, 5), dtype, scale=(10, 10000), sym=True)
+        self.check_L(Ab2)
+
+    @pytest.mark.parametrize(
+        "shape",
+        [
+            # empty square
+            (0, 0),
+            (3, 0, 0),
+            # empty batch
+            (2, 0, 3, 4, 4),
+        ],
+    )
+    @testing.for_dtypes(
+        [
+            numpy.int32,
+            numpy.uint16,
+            numpy.float32,
+            numpy.float64,
+            numpy.complex64,
+            numpy.complex128,
+        ]
+    )
+    @testing.numpy_cupy_allclose(type_check=has_support_aspect64())
+    def test_empty(self, shape, xp, dtype):
+        a = xp.empty(shape, dtype=dtype)
+        return xp.linalg.cholesky(a)
+
+
+class TestCholeskyInvalid(unittest.TestCase):
+    def check_L(self, array):
+        for xp in (numpy, cupy):
+            a = xp.asarray(array)
+            with pytest.raises(xp.linalg.LinAlgError):
+                xp.linalg.cholesky(a)
+
+    # TODO: remove skipif when MKLD-16626 is resolved
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
+    @testing.for_dtypes(
+        [
+            numpy.int32,
+            numpy.int64,
+            numpy.uint32,
+            numpy.uint64,
+            numpy.float32,
+            numpy.float64,
+        ]
+    )
+    def test_decomposition(self, dtype):
+        A = numpy.array([[1, -2], [-2, 1]]).astype(dtype)
+        self.check_L(A)
diff --git a/tests/third_party/cupy/linalg_tests/test_norms.py b/tests/third_party/cupy/linalg_tests/test_norms.py
new file mode 100644
index 00000000000..2ed49d16057
--- /dev/null
+++ b/tests/third_party/cupy/linalg_tests/test_norms.py
@@ -0,0 +1,136 @@
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from tests.helper import is_cpu_device
+from tests.third_party.cupy import testing
+
+
+# TODO: Remove the use of fixture for all tests in this file
+# when dpnp.prod() will support complex dtypes on Gen9
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+class TestDet(unittest.TestCase):
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det(self, xp, dtype):
+        a = testing.shaped_arange((2, 2), xp, dtype) + 1
+        return xp.linalg.det(a)
+
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_3(self, xp, dtype):
+        a = testing.shaped_arange((2, 2, 2), xp, dtype) + 1
+        return xp.linalg.det(a)
+
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_4(self, xp, dtype):
+        a = testing.shaped_arange((2, 2, 2, 2), xp, dtype) + 1
+        return xp.linalg.det(a)
+
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_empty_batch(self, xp, dtype):
+        a = xp.empty((2, 0, 3, 3), dtype=dtype)
+        return xp.linalg.det(a)
+
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_empty_matrix(self, xp, dtype):
+        a = xp.empty((0, 0), dtype=dtype)
+        return xp.linalg.det(a)
+
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_empty_matrices(self, xp, dtype):
+        a = xp.empty((2, 3, 0, 0), dtype=dtype)
+        return xp.linalg.det(a)
+
+    @testing.for_dtypes("fdFD")
+    def test_det_different_last_two_dims(self, dtype):
+        for xp in (numpy, cupy):
+            a = testing.shaped_arange((2, 3, 2), xp, dtype)
+            with pytest.raises(xp.linalg.LinAlgError):
+                xp.linalg.det(a)
+
+    @testing.for_dtypes("fdFD")
+    def test_det_different_last_two_dims_empty_batch(self, dtype):
+        for xp in (numpy, cupy):
+            a = xp.empty((0, 3, 2), dtype=dtype)
+            with pytest.raises(xp.linalg.LinAlgError):
+                xp.linalg.det(a)
+
+    @testing.for_dtypes("fdFD")
+    def test_det_one_dim(self, dtype):
+        for xp in (numpy, cupy):
+            a = testing.shaped_arange((2,), xp, dtype)
+            with pytest.raises(xp.linalg.LinAlgError):
+                xp.linalg.det(a)
+
+    @testing.for_dtypes("fdFD")
+    def test_det_zero_dim(self, dtype):
+        for xp in (numpy, cupy):
+            a = testing.shaped_arange((), xp, dtype)
+            with pytest.raises(xp.linalg.LinAlgError):
+                xp.linalg.det(a)
+
+    # TODO: remove skipif when MKLD-16626 is resolved
+    # _getrf_batch does not raise an error with singular matrices.
+    # Skip running on cpu because dpnp uses _getrf_batch only on cpu.
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_det_singular(self, xp, dtype):
+        a = xp.zeros((2, 3, 3), dtype=dtype)
+        return xp.linalg.det(a)
+
+
+@pytest.mark.usefixtures("allow_fall_back_on_numpy")
+class TestSlogdet(unittest.TestCase):
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_slogdet(self, xp, dtype):
+        a = testing.shaped_arange((2, 2), xp, dtype) + 1
+        sign, logdet = xp.linalg.slogdet(a)
+        return sign, logdet
+
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_slogdet_3(self, xp, dtype):
+        a = testing.shaped_arange((2, 2, 2), xp, dtype) + 1
+        sign, logdet = xp.linalg.slogdet(a)
+        return sign, logdet
+
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_slogdet_4(self, xp, dtype):
+        a = testing.shaped_arange((2, 2, 2, 2), xp, dtype) + 1
+        sign, logdet = xp.linalg.slogdet(a)
+        return sign, logdet
+
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_slogdet_singular(self, xp, dtype):
+        a = xp.zeros((3, 3), dtype=dtype)
+        sign, logdet = xp.linalg.slogdet(a)
+        return sign, logdet
+
+    @testing.for_dtypes("fdFD")
+    @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-4)
+    def test_slogdet_singular_errstate(self, xp, dtype):
+        a = xp.zeros((3, 3), dtype=dtype)
+        # TODO: dpnp has no errstate. Probably to be implemented later
+        # with cupyx.errstate(linalg="raise"):
+        # `cupy.linalg.slogdet` internally catches `dev_info < 0` from
+        # cuSOLVER, which should not affect `dev_info > 0` cases.
+        sign, logdet = xp.linalg.slogdet(a)
+        return sign, logdet
+
+    @testing.for_dtypes("fdFD")
+    def test_slogdet_one_dim(self, dtype):
+        for xp in (numpy, cupy):
+            a = testing.shaped_arange((2,), xp, dtype)
+            with pytest.raises(xp.linalg.LinAlgError):
+                xp.linalg.slogdet(a)
diff --git a/tests/third_party/cupy/linalg_tests/test_solve.py b/tests/third_party/cupy/linalg_tests/test_solve.py
index 6194cf6b8ac..b31082c8e84 100644
--- a/tests/third_party/cupy/linalg_tests/test_solve.py
+++ b/tests/third_party/cupy/linalg_tests/test_solve.py
@@ -4,8 +4,13 @@
 import pytest
 
 import dpnp as cupy
-from tests.helper import has_support_aspect64
+from tests.helper import (
+    assert_dtype_allclose,
+    has_support_aspect64,
+    is_cpu_device,
+)
 from tests.third_party.cupy import testing
+from tests.third_party.cupy.testing import condition
 
 
 @testing.parameterize(
@@ -38,8 +43,8 @@ def check_x(self, a_shape, b_shape, xp, dtype):
         a_copy = a.copy()
         b_copy = b.copy()
         result = xp.linalg.solve(a, b)
-        numpy.testing.assert_array_equal(a_copy, a)
-        numpy.testing.assert_array_equal(b_copy, b)
+        testing.assert_array_equal(a_copy, a)
+        testing.assert_array_equal(b_copy, b)
         return result
 
     def test_solve(self):
@@ -88,3 +93,76 @@ def test_invalid_shape(self):
         self.check_shape((2, 3, 3), (3,), value_errors)
         self.check_shape((3, 3), (0,), value_errors)
         self.check_shape((0, 3, 4), (3,), linalg_errors)
+
+
+@testing.parameterize(
+    *testing.product(
+        {
+            "order": ["C", "F"],
+        }
+    )
+)
+class TestInv(unittest.TestCase):
+    @testing.for_dtypes("ifdFD")
+    @condition.retry(10)
+    def check_x(self, a_shape, dtype):
+        a_cpu = numpy.random.randint(0, 10, size=a_shape)
+        a_cpu = a_cpu.astype(dtype, order=self.order)
+        a_gpu = cupy.asarray(a_cpu, order=self.order)
+        a_gpu_copy = a_gpu.copy()
+        result_cpu = numpy.linalg.inv(a_cpu)
+        result_gpu = cupy.linalg.inv(a_gpu)
+
+        assert_dtype_allclose(result_gpu, result_cpu)
+        testing.assert_array_equal(a_gpu_copy, a_gpu)
+
+    def check_shape(self, a_shape):
+        a = cupy.random.rand(*a_shape)
+        with self.assertRaises(
+            (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
+        ):
+            cupy.linalg.inv(a)
+
+    def test_inv(self):
+        self.check_x((3, 3))
+        self.check_x((4, 4))
+        self.check_x((5, 5))
+        self.check_x((2, 5, 5))
+        self.check_x((3, 4, 4))
+        self.check_x((4, 2, 3, 3))
+        self.check_x((0, 0))
+        self.check_x((3, 0, 0))
+        self.check_x((2, 0, 3, 4, 4))
+
+    def test_invalid_shape(self):
+        self.check_shape((2, 3))
+        self.check_shape((4, 1))
+        self.check_shape((4, 3, 2))
+        self.check_shape((2, 4, 3))
+        self.check_shape((2, 0))
+        self.check_shape((0, 2, 3))
+
+
+class TestInvInvalid(unittest.TestCase):
+    # TODO: remove skipif when MKLD-16626 is resolved
+    # _gesv does not raise an error with singular matrices on CPU.
+    @pytest.mark.skipif(is_cpu_device(), reason="MKLD-16626")
+    @testing.for_dtypes("ifdFD")
+    def test_inv(self, dtype):
+        for xp in (numpy, cupy):
+            a = xp.array([[1, 2], [2, 4]]).astype(dtype)
+            with pytest.raises(
+                (numpy.linalg.LinAlgError, cupy.linalg.LinAlgError)
+            ):
+                xp.linalg.inv(a)
+
+    # TODO: remove skipif when MKLD-16626 is resolved
+    # _getrf_batch does not raise an error with singular matrices.
+    @pytest.mark.skip("MKLD-16626")
+    @testing.for_dtypes("ifdFD")
+    def test_batched_inv(self, dtype):
+        for xp in (numpy, cupy):
+            a = xp.array([[[1, 2], [2, 4]]]).astype(dtype)
+            assert a.ndim >= 3  # CuPy internally uses a batched function.
+            with pytest.raises(xp.linalg.LinAlgError):
+                xp.linalg.inv(a)
diff --git a/tests/third_party/cupy/math_tests/test_arithmetic.py b/tests/third_party/cupy/math_tests/test_arithmetic.py
index 6c0dea51750..9297b3de937 100644
--- a/tests/third_party/cupy/math_tests/test_arithmetic.py
+++ b/tests/third_party/cupy/math_tests/test_arithmetic.py
@@ -15,12 +15,17 @@
 unsigned_int_types = []
 int_types = signed_int_types + unsigned_int_types
 all_types = float_types + int_types + complex_types
+negative_types_wo_fp16 = (
+    [numpy.bool_]
+    + [numpy.float32, numpy.float64]
+    + [numpy.int16, numpy.int32, numpy.int64]
+    + complex_types
+)
 negative_types = float_types + signed_int_types + complex_types
 negative_no_complex_types = float_types + signed_int_types
 no_complex_types = float_types + int_types
 
 
-@testing.gpu
 @testing.parameterize(
     *(
         testing.product(
@@ -53,7 +58,7 @@
         )
     )
 )
-class TestArithmeticRaisesWithNumpyInput(unittest.TestCase):
+class TestArithmeticRaisesWithNumpyInput:
     def test_raises_with_numpy_input(self):
         nargs = self.nargs
         name = self.name
@@ -65,11 +70,10 @@ def test_raises_with_numpy_input(self):
                 # We don't test all-cupy-array inputs here
                 continue
             arys = [xp.array([2, -3]) for xp in input_xp_list]
-            with self.assertRaises(TypeError):
+            with pytest.raises(TypeError):
                 func(*arys)
 
 
-@testing.gpu
 @testing.parameterize(
     *(
         testing.product(
@@ -79,11 +83,34 @@ def test_raises_with_numpy_input(self):
                         testing.shaped_arange((2, 3), numpy, dtype=d)
                         for d in all_types
                     ]
-                    + [0, 0.0j, 0j, 2, 2.0, 2j, True, False]
                 ),
                 "name": ["conj", "conjugate", "real", "imag"],
             }
         )
+        + testing.product(
+            {
+                "arg1": (
+                    [
+                        testing.shaped_arange((2, 3), numpy, dtype=d)
+                        for d in all_types
+                    ]
+                ),
+                "deg": [True, False],
+                "name": ["angle"],
+            }
+        )
+        + testing.product(
+            {
+                "arg1": (
+                    [
+                        numpy.array([-3, -2, -1, 1, 2, 3], dtype=d)
+                        for d in negative_types_wo_fp16
+                    ]
+                ),
+                "deg": [True, False],
+                "name": ["angle"],
+            }
+        )
         + testing.product(
             {
                 "arg1": (
@@ -91,21 +118,42 @@ def test_raises_with_numpy_input(self):
                         testing.shaped_arange((2, 3), numpy, dtype=d) + 1
                         for d in all_types
                     ]
-                    + [2, 2.0]
                 ),
                 "name": ["reciprocal"],
             }
         )
     )
 )
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-class TestArithmeticUnary(unittest.TestCase):
+class TestArithmeticUnary:
     @testing.numpy_cupy_allclose(atol=1e-5, type_check=has_support_aspect64())
     def test_unary(self, xp):
         arg1 = self.arg1
         if isinstance(arg1, numpy.ndarray):
             arg1 = xp.asarray(arg1)
-        y = getattr(xp, self.name)(arg1)
+
+        if self.name in ("reciprocal") and xp is numpy:
+            # In Numpy, for integer arguments with absolute value larger than 1 the result is always zero.
+            # We need to convert the input data type to float then compare the output with DPNP.
+            if isinstance(arg1, numpy.ndarray) and numpy.issubdtype(
+                arg1.dtype, numpy.integer
+            ):
+                np_dtype = (
+                    numpy.float64 if has_support_aspect64() else numpy.float32
+                )
+                arg1 = xp.asarray(arg1, dtype=np_dtype)
+
+        if self.name in {"angle"}:
+            y = getattr(xp, self.name)(arg1, self.deg)
+            # In Numpy, for boolean arguments the output data type is always default floating data type.
+            # while data type of output in DPNP is determined by Type Promotion Rules.
+            if (
+                isinstance(arg1, cupy.ndarray)
+                and cupy.issubdtype(arg1.dtype, cupy.bool)
+                and has_support_aspect64()
+            ):
+                y = y.astype(cupy.float64)
+        else:
+            y = getattr(xp, self.name)(arg1)
 
         # if self.name in ("real", "imag"):
         # Some NumPy functions return Python scalars for Python scalar
diff --git a/tests/third_party/cupy/math_tests/test_matmul.py b/tests/third_party/cupy/math_tests/test_matmul.py
index d0f3555373a..d21ec7a2d68 100644
--- a/tests/third_party/cupy/math_tests/test_matmul.py
+++ b/tests/third_party/cupy/math_tests/test_matmul.py
@@ -25,40 +25,38 @@
                 ((0,), (0,)),
                 # matmul test
                 ((5, 3, 2), (5, 2, 4)),
-                # ((0, 3, 2), (0, 2, 4)),
-                # ((5, 3, 2), (2, 4)),
-                # ((0, 3, 2), (2, 4)),
-                # ((3, 2), (5, 2, 4)),
-                # ((3, 2), (0, 2, 4)),
-                # ((5, 3, 2), (1, 2, 4)),
-                # ((0, 3, 2), (1, 2, 4)),
-                # ((1, 3, 2), (5, 2, 4)),
-                # ((1, 3, 2), (0, 2, 4)),
-                # ((5, 3, 2), (2,)),
-                # ((5, 3, 0), (0,)),
-                # ((2,), (5, 2, 4)),
-                # ((0,), (5, 0, 4)),
-                # ((2, 2, 3, 2), (2, 2, 2, 4)),
-                # ((5, 0, 3, 2), (5, 0, 2, 4)),
-                # ((6, 5, 3, 2), (2, 4)),
-                # ((5, 0, 3, 2), (2, 4)),
-                # ((3, 2), (6, 5, 2, 4)),
-                # ((3, 2), (5, 0, 2, 4)),
-                # ((1, 5, 3, 2), (6, 1, 2, 4)),
-                # ((1, 0, 3, 2), (6, 1, 2, 4)),
-                # ((6, 1, 3, 2), (1, 5, 2, 4)),
-                # ((6, 1, 3, 2), (1, 0, 2, 4)),
-                # ((6, 5, 3, 2), (2,)),
-                # ((6, 5, 3, 0), (0,)),
-                # ((2,), (6, 5, 2, 4)),
-                # ((0,), (6, 5, 0, 4)),
+                ((0, 3, 2), (0, 2, 4)),
+                ((5, 3, 2), (2, 4)),
+                ((0, 3, 2), (2, 4)),
+                ((3, 2), (5, 2, 4)),
+                ((3, 2), (0, 2, 4)),
+                ((5, 3, 2), (1, 2, 4)),
+                ((0, 3, 2), (1, 2, 4)),
+                ((1, 3, 2), (5, 2, 4)),
+                ((1, 3, 2), (0, 2, 4)),
+                ((5, 3, 2), (2,)),
+                ((5, 3, 0), (0,)),
+                ((2,), (5, 2, 4)),
+                ((0,), (5, 0, 4)),
+                ((2, 2, 3, 2), (2, 2, 2, 4)),
+                ((5, 0, 3, 2), (5, 0, 2, 4)),
+                ((6, 5, 3, 2), (2, 4)),
+                ((5, 0, 3, 2), (2, 4)),
+                ((3, 2), (6, 5, 2, 4)),
+                ((3, 2), (5, 0, 2, 4)),
+                ((1, 5, 3, 2), (6, 1, 2, 4)),
+                ((1, 0, 3, 2), (6, 1, 2, 4)),
+                ((6, 1, 3, 2), (1, 5, 2, 4)),
+                ((6, 1, 3, 2), (1, 0, 2, 4)),
+                ((6, 5, 3, 2), (2,)),
+                ((6, 5, 3, 0), (0,)),
+                ((2,), (6, 5, 2, 4)),
+                ((0,), (6, 5, 0, 4)),
                 ((1, 3, 3), (10, 1, 3, 1)),
             ],
         }
     )
 )
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@testing.gpu
 class TestMatmul(unittest.TestCase):
     @testing.for_all_dtypes(name="dtype1")
     @testing.numpy_cupy_allclose(rtol=1e-3, atol=1e-3)  # required for uint8
@@ -94,8 +92,6 @@ def test_cupy_matmul(self, xp, dtype1):
         }
     )
 )
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@testing.gpu
 class TestMatmulLarge(unittest.TestCase):
     # Avoid overflow
     skip_dtypes = {
@@ -151,8 +147,6 @@ def test_cupy_matmul(self, xp, dtype1):
         }
     )
 )
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@testing.gpu
 class TestMatmulInvalidShape(unittest.TestCase):
     def test_invalid_shape(self):
         for xp in (numpy, dpnp):
diff --git a/tests/third_party/cupy/math_tests/test_rounding.py b/tests/third_party/cupy/math_tests/test_rounding.py
index 5c9cbcd4aa6..7e451ae1e51 100644
--- a/tests/third_party/cupy/math_tests/test_rounding.py
+++ b/tests/third_party/cupy/math_tests/test_rounding.py
@@ -8,7 +8,6 @@
 from tests.third_party.cupy import testing
 
 
-@testing.gpu
 class TestRounding(unittest.TestCase):
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose(type_check=False, atol=1e-5)
@@ -26,7 +25,9 @@ def check_unary_complex(self, name, xp, dtype):
     def check_unary_complex_unsupported(self, name, dtype):
         for xp in (numpy, cupy):
             a = testing.shaped_arange((2, 3), xp, dtype)
-            with pytest.raises(TypeError):
+            # NumPy returns TypeError while DPNP returns ValueError
+            # for these functions: "ceil", "floor", "trunc"
+            with pytest.raises((TypeError, ValueError)):
                 getattr(xp, name)(a)
 
     @testing.for_dtypes(["?", "b", "h", "i", "q", "e", "f", "d"])
diff --git a/tests/third_party/cupy/math_tests/test_sumprod.py b/tests/third_party/cupy/math_tests/test_sumprod.py
index fc94b329665..e4306788885 100644
--- a/tests/third_party/cupy/math_tests/test_sumprod.py
+++ b/tests/third_party/cupy/math_tests/test_sumprod.py
@@ -1,5 +1,3 @@
-import unittest
-
 import numpy
 import pytest
 
@@ -8,62 +6,63 @@
 from tests.third_party.cupy import testing
 
 
-class TestSumprod(unittest.TestCase):
+# Note: numpy.sum() always upcast integers to (u)int64 and float32 to
+# float64 for dtype=None. `np.sum` does that too for integers, but not for
+# float32, so we need to special-case it for these tests
+def _get_dtype_kwargs(xp, dtype):
+    if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
+        return {"dtype": numpy.float64}
+    return {}
+
+
+class TestSumprod:
     def tearDown(self):
         # Free huge memory for slow test
         # cupy.get_default_memory_pool().free_all_blocks()
         # cupy.get_default_pinned_memory_pool().free_all_blocks()
         pass
 
-    # Note: numpy.sum() always upcast integers to (u)int64 and float32 to
-    # float64 for dtype=None. `np.sum` does that too for integers, but not for
-    # float32, so we need to special-case it for these tests
-    def _get_dtype_kwargs(self, xp, dtype):
-        if xp is numpy and dtype == numpy.float32 and has_support_aspect64():
-            return {"dtype": numpy.float64}
-        return {}
-
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_sum_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype))
+        return a.sum(**_get_dtype_kwargs(xp, dtype))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_sum_all_keepdims(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype), keepdims=True)
+        return a.sum(**_get_dtype_kwargs(xp, dtype), keepdims=True)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_external_sum_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return xp.sum(a, **self._get_dtype_kwargs(xp, dtype))
+        return xp.sum(a, **_get_dtype_kwargs(xp, dtype))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-06)
     def test_sum_all2(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40), xp, dtype)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype))
+        return a.sum(**_get_dtype_kwargs(xp, dtype))
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose(type_check=False)
+    @testing.numpy_cupy_allclose()
     def test_sum_all_transposed(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype).transpose(2, 0, 1)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype))
+        return a.sum(**_get_dtype_kwargs(xp, dtype))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-06)
     def test_sum_all_transposed2(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40), xp, dtype).transpose(2, 0, 1)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype))
+        return a.sum(**_get_dtype_kwargs(xp, dtype))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_sum_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype), axis=1)
+        return a.sum(**_get_dtype_kwargs(xp, dtype), axis=1)
 
     @testing.slow
     @testing.numpy_cupy_allclose()
@@ -75,7 +74,7 @@ def test_sum_axis_huge(self, xp):
     @testing.numpy_cupy_allclose()
     def test_external_sum_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return xp.sum(a, **self._get_dtype_kwargs(xp, dtype), axis=1)
+        return xp.sum(a, **_get_dtype_kwargs(xp, dtype), axis=1)
 
     # float16 is omitted, since NumPy's sum on float16 arrays has more error
     # than CuPy's.
@@ -83,49 +82,49 @@ def test_external_sum_axis(self, xp, dtype):
     @testing.numpy_cupy_allclose()
     def test_sum_axis2(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40), xp, dtype)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype), axis=1)
+        return a.sum(**_get_dtype_kwargs(xp, dtype), axis=1)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(contiguous_check=False)
     def test_sum_axis_transposed(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype).transpose(2, 0, 1)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype), axis=1)
+        return a.sum(**_get_dtype_kwargs(xp, dtype), axis=1)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(contiguous_check=False)
     def test_sum_axis_transposed2(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40), xp, dtype).transpose(2, 0, 1)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype), axis=1)
+        return a.sum(**_get_dtype_kwargs(xp, dtype), axis=1)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_sum_axes(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4, 5), xp, dtype)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype), axis=(1, 3))
+        return a.sum(**_get_dtype_kwargs(xp, dtype), axis=(1, 3))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-4)
     def test_sum_axes2(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40, 50), xp, dtype)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype), axis=(1, 3))
+        return a.sum(**_get_dtype_kwargs(xp, dtype), axis=(1, 3))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-6)
     def test_sum_axes3(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4, 5), xp, dtype)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype), axis=(0, 2, 3))
+        return a.sum(**_get_dtype_kwargs(xp, dtype), axis=(0, 2, 3))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(rtol=1e-6)
     def test_sum_axes4(self, xp, dtype):
         a = testing.shaped_arange((20, 30, 40, 50), xp, dtype)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype), axis=(0, 2, 3))
+        return a.sum(**_get_dtype_kwargs(xp, dtype), axis=(0, 2, 3))
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_sum_empty_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4, 5), xp, dtype)
-        return a.sum(**self._get_dtype_kwargs(xp, dtype), axis=())
+        return a.sum(**_get_dtype_kwargs(xp, dtype), axis=())
 
     @testing.for_all_dtypes_combination(names=["src_dtype", "dst_dtype"])
     @testing.numpy_cupy_allclose()
@@ -143,9 +142,7 @@ def test_sum_keepdims_and_dtype(self, xp, src_dtype, dst_dtype):
     @testing.numpy_cupy_allclose()
     def test_sum_keepdims_multiple_axes(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return a.sum(
-            **self._get_dtype_kwargs(xp, dtype), axis=(1, 2), keepdims=True
-        )
+        return a.sum(**_get_dtype_kwargs(xp, dtype), axis=(1, 2), keepdims=True)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
@@ -158,32 +155,32 @@ def test_sum_out(self, xp, dtype):
     def test_sum_out_wrong_shape(self):
         a = testing.shaped_arange((2, 3, 4))
         b = cupy.empty((2, 3))
-        with self.assertRaises(ValueError):
+        with pytest.raises(ValueError):
             a.sum(axis=1, out=b)
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose(type_check=False)
+    @testing.numpy_cupy_allclose()
     def test_prod_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
-        return a.prod()
+        return a.prod(**_get_dtype_kwargs(xp, dtype))
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose(type_check=False)
+    @testing.numpy_cupy_allclose()
     def test_external_prod_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
-        return xp.prod(a)
+        return xp.prod(a, **_get_dtype_kwargs(xp, dtype))
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose(type_check=False)
+    @testing.numpy_cupy_allclose()
     def test_prod_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return a.prod(axis=1)
+        return a.prod(axis=1, **_get_dtype_kwargs(xp, dtype))
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose(type_check=False)
+    @testing.numpy_cupy_allclose()
     def test_external_prod_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return xp.prod(a, axis=1)
+        return xp.prod(a, axis=1, **_get_dtype_kwargs(xp, dtype))
 
     @testing.for_all_dtypes_combination(names=["src_dtype", "dst_dtype"])
     @testing.numpy_cupy_allclose()
@@ -203,8 +200,7 @@ def test_prod_dtype(self, xp, src_dtype, dst_dtype):
         }
     )
 )
-@testing.gpu
-class TestNansumNanprodLong(unittest.TestCase):
+class TestNansumNanprodLong:
     def _do_transposed_axis_test(self):
         return not self.transpose_axes and self.axis != 1
 
@@ -232,10 +228,15 @@ def _test(self, xp, dtype):
         if not issubclass(dtype, xp.integer):
             a[:, 1] = xp.nan
         func = getattr(xp, self.func)
-        return func(a, axis=self.axis, keepdims=self.keepdims)
+        return func(
+            a,
+            **_get_dtype_kwargs(xp, dtype),
+            axis=self.axis,
+            keepdims=self.keepdims,
+        )
 
     @testing.for_all_dtypes(no_bool=True, no_float16=True)
-    @testing.numpy_cupy_allclose(type_check=False)
+    @testing.numpy_cupy_allclose(type_check=has_support_aspect64())
     def test_nansum_all(self, xp, dtype):
         if (
             not self._numpy_nanprod_implemented()
@@ -245,7 +246,9 @@ def test_nansum_all(self, xp, dtype):
         return self._test(xp, dtype)
 
     @testing.for_all_dtypes(no_bool=True, no_float16=True)
-    @testing.numpy_cupy_allclose(contiguous_check=False, type_check=False)
+    @testing.numpy_cupy_allclose(
+        contiguous_check=False, type_check=has_support_aspect64()
+    )
     def test_nansum_axis_transposed(self, xp, dtype):
         if (
             not self._numpy_nanprod_implemented()
@@ -262,9 +265,7 @@ def test_nansum_axis_transposed(self, xp, dtype):
         }
     )
 )
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@testing.gpu
-class TestNansumNanprodExtra(unittest.TestCase):
+class TestNansumNanprodExtra:
     @testing.for_all_dtypes(no_bool=True, no_float16=True)
     @testing.numpy_cupy_allclose()
     def test_nansum_out(self, xp, dtype):
@@ -279,7 +280,7 @@ def test_nansum_out_wrong_shape(self):
         a = testing.shaped_arange(self.shape)
         a[:, 1] = cupy.nan
         b = cupy.empty((2, 3))
-        with self.assertRaises(ValueError):
+        with pytest.raises(ValueError):
             cupy.nansum(a, axis=1, out=b)
 
 
@@ -291,16 +292,36 @@ def test_nansum_out_wrong_shape(self):
         }
     )
 )
-@pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@testing.gpu
-class TestNansumNanprodAxes(unittest.TestCase):
+class TestNansumNanprodAxes:
     @testing.for_all_dtypes(no_bool=True, no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-6)
     def test_nansum_axes(self, xp, dtype):
         a = testing.shaped_arange(self.shape, xp, dtype)
         if not issubclass(dtype, xp.integer):
             a[:, 1] = xp.nan
-        return xp.nansum(a, axis=self.axis)
+        return xp.nansum(a, **_get_dtype_kwargs(xp, dtype), axis=self.axis)
+
+
+class TestNansumNanprodHuge:
+    def _test(self, xp, nan_slice):
+        a = testing.shaped_random((2048, 1, 1024), xp, "f")
+        a[nan_slice] = xp.nan
+        a = xp.broadcast_to(a, (2048, 256, 1024))
+        return xp.nansum(a, **_get_dtype_kwargs(xp, a.dtype), axis=2)
+
+    @testing.slow
+    @testing.numpy_cupy_allclose(atol=1e-1)
+    def test_nansum_axis_huge(self, xp):
+        return self._test(
+            xp, (slice(None, None), slice(None, None), slice(1, 2))
+        )
+
+    @testing.slow
+    @testing.numpy_cupy_allclose(atol=1e-2)
+    def test_nansum_axis_huge_halfnan(self, xp):
+        return self._test(
+            xp, (slice(None, None), slice(None, None), slice(0, 512))
+        )
 
 
 axes = [0, 1, 2]
@@ -309,7 +330,7 @@ def test_nansum_axes(self, xp, dtype):
 @testing.parameterize(*testing.product({"axis": axes}))
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
 # TODO: remove "type_check=False" once leveraged on dpctl call
-class TestCumsum(unittest.TestCase):
+class TestCumsum:
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(type_check=False)
     def test_cumsum(self, xp, dtype):
@@ -391,7 +412,7 @@ def test_invalid_axis_lower1(self, dtype):
     @testing.for_all_dtypes()
     def test_invalid_axis_lower2(self, dtype):
         a = testing.shaped_arange((4, 5), cupy, dtype)
-        with self.assertRaises(numpy.AxisError):
+        with pytest.raises(numpy.AxisError):
             return cupy.cumsum(a, axis=-a.ndim - 1)
 
     @testing.for_all_dtypes()
@@ -404,22 +425,21 @@ def test_invalid_axis_upper1(self, dtype):
     @testing.for_all_dtypes()
     def test_invalid_axis_upper2(self, dtype):
         a = testing.shaped_arange((4, 5), cupy, dtype)
-        with self.assertRaises(numpy.AxisError):
+        with pytest.raises(numpy.AxisError):
             return cupy.cumsum(a, axis=a.ndim + 1)
 
     def test_cumsum_arraylike(self):
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             return cupy.cumsum((1, 2, 3))
 
     @testing.for_float_dtypes()
     def test_cumsum_numpy_array(self, dtype):
         a_numpy = numpy.arange(8, dtype=dtype)
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             return cupy.cumsum(a_numpy)
 
 
-@testing.gpu
-class TestCumprod(unittest.TestCase):
+class TestCumprod:
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose()
     def test_cumprod_1dim(self, xp, dtype):
@@ -503,17 +523,17 @@ def test_invalid_axis_upper1(self, dtype):
     @testing.for_all_dtypes()
     def test_invalid_axis_upper2(self, dtype):
         a = testing.shaped_arange((4, 5), cupy, dtype)
-        with self.assertRaises(numpy.AxisError):
+        with pytest.raises(numpy.AxisError):
             return cupy.cumprod(a, axis=a.ndim)
 
     def test_cumprod_arraylike(self):
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             return cupy.cumprod((1, 2, 3))
 
     @testing.for_float_dtypes()
     def test_cumprod_numpy_array(self, dtype):
         a_numpy = numpy.arange(1, 6, dtype=dtype)
-        with self.assertRaises(TypeError):
+        with pytest.raises(TypeError):
             return cupy.cumprod(a_numpy)
 
 
diff --git a/tests/third_party/cupy/sorting_tests/test_search.py b/tests/third_party/cupy/sorting_tests/test_search.py
index d14503d6c17..11343da25a5 100644
--- a/tests/third_party/cupy/sorting_tests/test_search.py
+++ b/tests/third_party/cupy/sorting_tests/test_search.py
@@ -9,8 +9,7 @@
 # from cupy.core import _accelerator
 
 
-@testing.gpu
-class TestSearch(unittest.TestCase):
+class TestSearch:
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose()
     def test_argmax_all(self, xp, dtype):
@@ -218,7 +217,6 @@ def test_argmin_zero_size_axis1(self, xp, dtype):
 # return a.argmax()
 
 
-@testing.gpu
 @testing.parameterize(
     *testing.product(
         {
@@ -228,7 +226,7 @@ def test_argmin_zero_size_axis1(self, xp, dtype):
         }
     )
 )
-class TestArgMinMaxDtype(unittest.TestCase):
+class TestArgMinMaxDtype:
     @testing.for_dtypes(
         dtypes=[numpy.int8, numpy.int16, numpy.int32, numpy.int64],
         name="result_dtype",
diff --git a/tests/third_party/cupy/sorting_tests/test_sort.py b/tests/third_party/cupy/sorting_tests/test_sort.py
index 7ae7911f90e..60a48b31e38 100644
--- a/tests/third_party/cupy/sorting_tests/test_sort.py
+++ b/tests/third_party/cupy/sorting_tests/test_sort.py
@@ -17,7 +17,6 @@ def get_array_module(*args):
 cupy.get_array_module = get_array_module
 
 
-@testing.gpu
 class TestSort(unittest.TestCase):
     # Test ranks
 
@@ -33,14 +32,12 @@ def test_external_sort_zero_dim(self):
             with pytest.raises(numpy.AxisError):
                 xp.sort(a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_sort_two_or_more_dim(self, xp):
         a = testing.shaped_random((2, 3, 3), xp)
         a.sort()
         return a
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_external_sort_two_or_more_dim(self, xp):
         a = testing.shaped_random((2, 3, 3), xp)
@@ -69,10 +66,11 @@ def test_sort_contiguous(self, xp):
         a.sort()
         return a
 
-    def test_sort_non_contiguous(self):
-        a = testing.shaped_random((10,), cupy)[::2]  # Non contiguous view
-        with self.assertRaises(NotImplementedError):
-            a.sort()
+    @testing.numpy_cupy_array_equal()
+    def test_sort_non_contiguous(self, xp):
+        a = testing.shaped_random((10,), xp)[::2]  # Non contiguous view
+        a.sort()
+        return a
 
     @testing.numpy_cupy_array_equal()
     def test_external_sort_contiguous(self, xp):
@@ -104,7 +102,6 @@ def test_sort_axis3(self, xp):
         a.sort(axis=2)
         return a
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_external_sort_axis(self, xp):
         a = testing.shaped_random((2, 3, 3), xp)
@@ -116,13 +113,11 @@ def test_sort_negative_axis(self, xp):
         a.sort(axis=-2)
         return a
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_external_sort_negative_axis(self, xp):
         a = testing.shaped_random((2, 3, 3), xp)
         return xp.sort(a, axis=-2)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.numpy_cupy_array_equal()
     def test_external_sort_none_axis(self, xp):
         a = testing.shaped_random((2, 3, 3), xp)
@@ -139,14 +134,12 @@ def test_sort_invalid_axis2(self):
         with self.assertRaises(numpy.AxisError):
             a.sort(axis=3)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_external_sort_invalid_axis1(self):
         for xp in (numpy, cupy):
             a = testing.shaped_random((2, 3, 3), xp)
             with pytest.raises(numpy.AxisError):
                 xp.sort(a, axis=3)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_external_sort_invalid_axis2(self):
         a = testing.shaped_random((2, 3, 3), cupy)
         with self.assertRaises(numpy.AxisError):
@@ -163,14 +156,12 @@ def test_sort_invalid_negative_axis2(self):
         with self.assertRaises(numpy.AxisError):
             a.sort(axis=-4)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_external_sort_invalid_negative_axis1(self):
         for xp in (numpy, cupy):
             a = testing.shaped_random((2, 3, 3), xp)
             with pytest.raises(numpy.AxisError):
                 xp.sort(a, axis=-4)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     def test_external_sort_invalid_negative_axis2(self):
         a = testing.shaped_random((2, 3, 3), cupy)
         with self.assertRaises(numpy.AxisError):
@@ -210,6 +201,14 @@ def test_nan4(self, xp, dtype):
         out = xp.sort(a, axis=2)
         return out
 
+    # Large case
+
+    @testing.slow
+    @testing.numpy_cupy_array_equal()
+    def test_large(self, xp):
+        a = testing.shaped_random((17, 1023, 1023), xp)
+        return xp.sort(a, axis=-1)
+
 
 @testing.gpu
 class TestLexsort(unittest.TestCase):
@@ -296,14 +295,13 @@ def test_F_order(self, xp):
         }
     )
 )
-@testing.gpu
 class TestArgsort(unittest.TestCase):
-    def argsort(self, a, axis=-1):
+    def argsort(self, a, axis=-1, kind=None):
         if self.external:
             xp = cupy.get_array_module(a)
-            return xp.argsort(a, axis=axis)
+            return xp.argsort(a, axis=axis, kind=kind)
         else:
-            return a.argsort(axis=axis)
+            return a.argsort(axis=axis, kind=kind)
 
     # Test base cases
 
@@ -311,14 +309,15 @@ def argsort(self, a, axis=-1):
     @testing.numpy_cupy_array_equal()
     def test_argsort_zero_dim(self, xp, dtype):
         a = testing.shaped_random((), xp, dtype)
-        return self.argsort(a)
+        # only numpy allows 0d array without axis=None
+        kwargs = {} if xp == numpy else {"axis": None}
+        return self.argsort(a, **kwargs)
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
     def test_argsort_one_dim(self, xp, dtype):
         a = testing.shaped_random((10,), xp, dtype)
-        res = self.argsort(a)
-        return a[res]
+        return self.argsort(a, axis=-1, kind="stable")
 
     @testing.for_all_dtypes()
     @testing.numpy_cupy_array_equal()
@@ -362,7 +361,8 @@ def test_argsort_invalid_axis2(self):
     @testing.numpy_cupy_array_equal()
     def test_argsort_zero_dim_axis(self, xp):
         a = testing.shaped_random((), xp)
-        return self.argsort(a, axis=0)
+        # only numpy allows 0d array without axis=None
+        return self.argsort(a, axis=None)
 
     def test_argsort_zero_dim_invalid_axis(self):
         for xp in (numpy, cupy):
diff --git a/tests/third_party/cupy/statistics_tests/test_meanvar.py b/tests/third_party/cupy/statistics_tests/test_meanvar.py
index de2eb22604f..1537a57cbc0 100644
--- a/tests/third_party/cupy/statistics_tests/test_meanvar.py
+++ b/tests/third_party/cupy/statistics_tests/test_meanvar.py
@@ -1,5 +1,3 @@
-import unittest
-
 import numpy
 import pytest
 
@@ -12,8 +10,7 @@
 )
 
 
-@testing.gpu
-class TestMedian(unittest.TestCase):
+class TestMedian:
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(type_check=has_support_aspect64())
     def test_median_noaxis(self, xp, dtype):
@@ -89,8 +86,7 @@ def test_median_invalid_axis(self):
     )
 )
 @pytest.mark.usefixtures("allow_fall_back_on_numpy")
-@testing.gpu
-class TestMedianAxis(unittest.TestCase):
+class TestMedianAxis:
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(type_check=has_support_aspect64())
     def test_median_axis_sequence(self, xp, dtype):
@@ -98,61 +94,63 @@ def test_median_axis_sequence(self, xp, dtype):
         return xp.median(a, self.axis, keepdims=self.keepdims)
 
 
-@testing.gpu
-class TestAverage(unittest.TestCase):
+class TestAverage:
     _multiprocess_can_split_ = True
 
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(type_check=has_support_aspect64())
     def test_average_all(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
         return xp.average(a)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(type_check=has_support_aspect64())
     def test_average_axis(self, xp, dtype):
         a = testing.shaped_arange((2, 3, 4), xp, dtype)
         return xp.average(a, axis=1)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose()
+    @testing.numpy_cupy_allclose(type_check=has_support_aspect64())
     def test_average_weights(self, xp, dtype):
         a = testing.shaped_arange((2, 3), xp, dtype)
         w = testing.shaped_arange((2, 3), xp, dtype)
         return xp.average(a, weights=w)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
-    @testing.numpy_cupy_allclose(type_check=has_support_aspect64())
-    def test_average_axis_weights(self, xp, dtype):
-        a = testing.shaped_arange((2, 3, 4), xp, dtype)
-        w = testing.shaped_arange((2, 3, 4), xp, dtype)
-        return xp.average(a, axis=2, weights=w)
-
-    def check_returned(self, a, axis, weights):
-        average_cpu, sum_weights_cpu = numpy.average(
-            a, axis, weights, returned=True
+    @testing.numpy_cupy_allclose(rtol=2e-7, type_check=has_support_aspect64())
+    @pytest.mark.parametrize(
+        "axis, weights", [(1, False), (None, True), (1, True)]
+    )
+    def test_returned(self, xp, dtype, axis, weights):
+        a = testing.shaped_arange((2, 3), xp, dtype)
+        if weights:
+            w = testing.shaped_arange((2, 3), xp, dtype)
+        else:
+            w = None
+        return xp.average(a, axis=axis, weights=w, returned=True)
+
+    @testing.for_all_dtypes()
+    @testing.numpy_cupy_allclose(rtol=5e-7, type_check=has_support_aspect64())
+    @pytest.mark.parametrize("returned", [True, False])
+    @testing.with_requires("numpy>=1.23.1")
+    def test_average_keepdims_axis1(self, xp, dtype, returned):
+        a = testing.shaped_random((2, 3), xp, dtype)
+        w = testing.shaped_random((2, 3), xp, dtype)
+        return xp.average(
+            a, axis=1, weights=w, returned=returned, keepdims=True
         )
-        result = cupy.average(cupy.asarray(a), axis, weights, returned=True)
-        self.assertTrue(isinstance(result, tuple))
-        self.assertEqual(len(result), 2)
-        average_gpu, sum_weights_gpu = result
-        testing.assert_allclose(average_cpu, average_gpu)
-        testing.assert_allclose(sum_weights_cpu, sum_weights_gpu)
 
-    @pytest.mark.usefixtures("allow_fall_back_on_numpy")
     @testing.for_all_dtypes()
-    def test_returned(self, dtype):
-        a = testing.shaped_arange((2, 3), numpy, dtype)
-        w = testing.shaped_arange((2, 3), numpy, dtype)
-        self.check_returned(a, axis=1, weights=None)
-        self.check_returned(a, axis=None, weights=w)
-        self.check_returned(a, axis=1, weights=w)
+    @testing.numpy_cupy_allclose(rtol=1e-7, type_check=has_support_aspect64())
+    @pytest.mark.parametrize("returned", [True, False])
+    @testing.with_requires("numpy>=1.23.1")
+    def test_average_keepdims_noaxis(self, xp, dtype, returned):
+        a = testing.shaped_random((2, 3), xp, dtype)
+        w = testing.shaped_random((2, 3), xp, dtype)
+        return xp.average(a, weights=w, returned=returned, keepdims=True)
 
 
-class TestMeanVar(unittest.TestCase):
+class TestMeanVar:
     @testing.for_all_dtypes()
     @testing.numpy_cupy_allclose(type_check=has_support_aspect64())
     def test_mean_all(self, xp, dtype):
@@ -301,17 +299,16 @@ def test_external_std_axis_ddof(self, xp, dtype):
         }
     )
 )
-@testing.gpu
-class TestNanMean(unittest.TestCase):
+class TestNanMean:
     @testing.for_all_dtypes(no_float16=True)
-    @testing.numpy_cupy_allclose(rtol=1e-6)
+    @testing.numpy_cupy_allclose(rtol=1e-6, type_check=has_support_aspect64())
     def test_nanmean_without_nan(self, xp, dtype):
         a = testing.shaped_random(self.shape, xp, dtype)
         return xp.nanmean(a, axis=self.axis, keepdims=self.keepdims)
 
-    @ignore_runtime_warnings
+    @pytest.mark.usefixtures("suppress_mean_empty_slice_numpy_warnings")
     @testing.for_all_dtypes(no_float16=True)
-    @testing.numpy_cupy_allclose(rtol=1e-6)
+    @testing.numpy_cupy_allclose(rtol=1e-6, type_check=has_support_aspect64())
     def test_nanmean_with_nan_float(self, xp, dtype):
         a = testing.shaped_random(self.shape, xp, dtype)
 
@@ -322,14 +319,18 @@ def test_nanmean_with_nan_float(self, xp, dtype):
         return xp.nanmean(a, axis=self.axis, keepdims=self.keepdims)
 
 
-@testing.gpu
-class TestNanMeanAdditional(unittest.TestCase):
-    @ignore_runtime_warnings
+class TestNanMeanAdditional:
+    @pytest.mark.usefixtures("suppress_mean_empty_slice_numpy_warnings")
     @testing.for_all_dtypes(no_float16=True)
-    @testing.numpy_cupy_allclose(rtol=1e-6)
+    @testing.numpy_cupy_allclose(rtol=1e-6, type_check=has_support_aspect64())
     def test_nanmean_out(self, xp, dtype):
         a = testing.shaped_random((10, 20, 30), xp, dtype)
-        z = xp.zeros((20, 30), dtype=dtype)
+        # `numpy.mean` allows ``unsafe`` casting while `dpnp.mean` does not.
+        # So, output data type cannot be the same as input.
+        out_dtype = (
+            cupy.default_float_type(a.device) if xp == cupy else numpy.float64
+        )
+        z = xp.zeros((20, 30), dtype=out_dtype)
 
         if a.dtype.kind not in "biu":
             a[1, :] = xp.nan
@@ -340,7 +341,7 @@ def test_nanmean_out(self, xp, dtype):
 
     @testing.slow
     @testing.for_all_dtypes(no_float16=True)
-    @testing.numpy_cupy_allclose(rtol=1e-6)
+    @testing.numpy_cupy_allclose(rtol=1e-6, type_check=has_support_aspect64())
     def test_nanmean_huge(self, xp, dtype):
         a = testing.shaped_random((1024, 512), xp, dtype)
 
@@ -349,14 +350,17 @@ def test_nanmean_huge(self, xp, dtype):
 
         return xp.nanmean(a, axis=1)
 
+    @pytest.mark.skipif(
+        not has_support_aspect16(), reason="No fp16 support by device"
+    )
     @testing.numpy_cupy_allclose(rtol=1e-4)
     def test_nanmean_float16(self, xp):
         a = testing.shaped_arange((2, 3), xp, numpy.float16)
         a[0][0] = xp.nan
         return xp.nanmean(a)
 
-    @ignore_runtime_warnings
-    @testing.numpy_cupy_allclose(rtol=1e-6)
+    @pytest.mark.usefixtures("suppress_mean_empty_slice_numpy_warnings")
+    @testing.numpy_cupy_allclose(rtol=1e-6, type_check=has_support_aspect64())
     def test_nanmean_all_nan(self, xp):
         a = xp.zeros((3, 4))
         a[:] = xp.nan
@@ -373,7 +377,7 @@ def test_nanmean_all_nan(self, xp):
         }
     )
 )
-class TestNanVarStd(unittest.TestCase):
+class TestNanVarStd:
     @pytest.mark.usefixtures("suppress_dof_numpy_warnings")
     @testing.for_all_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-6, type_check=has_support_aspect64())
@@ -385,8 +389,9 @@ def test_nanvar(self, xp, dtype):
             a, axis=self.axis, ddof=self.ddof, keepdims=self.keepdims
         )
 
+    @pytest.mark.usefixtures("suppress_dof_numpy_warnings")
     @testing.for_all_dtypes(no_float16=True)
-    @testing.numpy_cupy_allclose(rtol=1e-6)
+    @testing.numpy_cupy_allclose(rtol=1e-6, type_check=has_support_aspect64())
     def test_nanstd(self, xp, dtype):
         a = testing.shaped_random(self.shape, xp, dtype=dtype)
         if a.dtype.kind not in "biu":
@@ -396,7 +401,7 @@ def test_nanstd(self, xp, dtype):
         )
 
 
-class TestNanVarStdAdditional(unittest.TestCase):
+class TestNanVarStdAdditional:
     @pytest.mark.usefixtures("suppress_dof_numpy_warnings")
     @testing.for_all_dtypes(no_float16=True)
     @testing.numpy_cupy_allclose(rtol=1e-6, type_check=has_support_aspect64())
@@ -431,8 +436,9 @@ def test_nanvar_float16(self, xp):
         a[0][0] = xp.nan
         return xp.nanvar(a, axis=0)
 
+    @pytest.mark.usefixtures("suppress_dof_numpy_warnings")
     @testing.for_all_dtypes(no_float16=True)
-    @testing.numpy_cupy_allclose(rtol=1e-6)
+    @testing.numpy_cupy_allclose(rtol=1e-6, type_check=has_support_aspect64())
     def test_nanstd_out(self, xp, dtype):
         a = testing.shaped_random((10, 20, 30), xp, dtype)
         z = xp.zeros((20, 30))
@@ -446,7 +452,7 @@ def test_nanstd_out(self, xp, dtype):
 
     @testing.slow
     @testing.for_all_dtypes(no_float16=True)
-    @testing.numpy_cupy_allclose(rtol=1e-6)
+    @testing.numpy_cupy_allclose(rtol=1e-6, type_check=has_support_aspect64())
     def test_nanstd_huge(self, xp, dtype):
         a = testing.shaped_random((1024, 512), xp, dtype)
 
@@ -455,6 +461,9 @@ def test_nanstd_huge(self, xp, dtype):
 
         return xp.nanstd(a, axis=1)
 
+    @pytest.mark.skipif(
+        not has_support_aspect16(), reason="No fp16 support by device"
+    )
     @testing.numpy_cupy_allclose(rtol=1e-4)
     def test_nanstd_float16(self, xp):
         a = testing.shaped_arange((4, 5), xp, numpy.float16)
@@ -482,7 +491,7 @@ def test_nanstd_float16(self, xp):
     "suppress_dof_numpy_warnings",
     "suppress_mean_empty_slice_numpy_warnings",
 )
-class TestProductZeroLength(unittest.TestCase):
+class TestProductZeroLength:
     @testing.for_all_dtypes(no_complex=True)
     @testing.numpy_cupy_allclose(type_check=has_support_aspect64())
     def test_external_mean_zero_len(self, xp, dtype):
diff --git a/tests/third_party/cupy/statistics_tests/test_order.py b/tests/third_party/cupy/statistics_tests/test_order.py
index 62ac2f72b36..3805933d490 100644
--- a/tests/third_party/cupy/statistics_tests/test_order.py
+++ b/tests/third_party/cupy/statistics_tests/test_order.py
@@ -1,4 +1,3 @@
-import unittest
 import warnings
 
 import numpy
diff --git a/tests/third_party/cupy/test_type_routines.py b/tests/third_party/cupy/test_type_routines.py
index 6a274158bcd..e6fd09c7419 100644
--- a/tests/third_party/cupy/test_type_routines.py
+++ b/tests/third_party/cupy/test_type_routines.py
@@ -1,102 +1,105 @@
-import unittest
-
-import numpy
-import pytest
-
-import dpnp as cupy
-from tests.third_party.cupy import testing
-
-
-def _generate_type_routines_input(xp, dtype, obj_type):
-    dtype = numpy.dtype(dtype)
-    if obj_type == "dtype":
-        return dtype
-    if obj_type == "specifier":
-        return str(dtype)
-    if obj_type == "scalar":
-        return dtype.type(3)
-    if obj_type == "array":
-        return xp.zeros(3, dtype=dtype)
-    if obj_type == "primitive":
-        return type(dtype.type(3).tolist())
-    assert False
-
-
-@testing.parameterize(
-    *testing.product(
-        {
-            "obj_type": ["dtype", "specifier", "scalar", "array", "primitive"],
-        }
-    )
-)
-class TestCanCast(unittest.TestCase):
-    @testing.for_all_dtypes_combination(names=("from_dtype", "to_dtype"))
-    @testing.numpy_cupy_equal()
-    def test_can_cast(self, xp, from_dtype, to_dtype):
-        if self.obj_type == "scalar":
-            pytest.skip("to be aligned with NEP-50")
-
-        from_obj = _generate_type_routines_input(xp, from_dtype, self.obj_type)
-
-        ret = xp.can_cast(from_obj, to_dtype)
-        assert isinstance(ret, bool)
-        return ret
-
-
-@pytest.mark.skip("dpnp.common_type() is not implemented yet")
-class TestCommonType(unittest.TestCase):
-    @testing.numpy_cupy_equal()
-    def test_common_type_empty(self, xp):
-        ret = xp.common_type()
-        assert type(ret) == type
-        return ret
-
-    @testing.for_all_dtypes(no_bool=True)
-    @testing.numpy_cupy_equal()
-    def test_common_type_single_argument(self, xp, dtype):
-        array = _generate_type_routines_input(xp, dtype, "array")
-        ret = xp.common_type(array)
-        assert type(ret) == type
-        return ret
-
-    @testing.for_all_dtypes_combination(
-        names=("dtype1", "dtype2"), no_bool=True
-    )
-    @testing.numpy_cupy_equal()
-    def test_common_type_two_arguments(self, xp, dtype1, dtype2):
-        array1 = _generate_type_routines_input(xp, dtype1, "array")
-        array2 = _generate_type_routines_input(xp, dtype2, "array")
-        ret = xp.common_type(array1, array2)
-        assert type(ret) == type
-        return ret
-
-    @testing.for_all_dtypes()
-    def test_common_type_bool(self, dtype):
-        for xp in (numpy, cupy):
-            array1 = _generate_type_routines_input(xp, dtype, "array")
-            array2 = _generate_type_routines_input(xp, "bool_", "array")
-            with pytest.raises(TypeError):
-                xp.common_type(array1, array2)
-
-
-@testing.parameterize(
-    *testing.product(
-        {
-            "obj_type1": ["dtype", "specifier", "scalar", "array", "primitive"],
-            "obj_type2": ["dtype", "specifier", "scalar", "array", "primitive"],
-        }
-    )
-)
-class TestResultType(unittest.TestCase):
-    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
-    @testing.numpy_cupy_equal()
-    def test_result_type(self, xp, dtype1, dtype2):
-        if "scalar" in {self.obj_type1, self.obj_type2}:
-            pytest.skip("to be aligned with NEP-50")
-
-        input1 = _generate_type_routines_input(xp, dtype1, self.obj_type1)
-
-        input2 = _generate_type_routines_input(xp, dtype2, self.obj_type2)
-        ret = xp.result_type(input1, input2)
-        assert isinstance(ret, numpy.dtype)
-        return ret
+import unittest
+
+import numpy
+import pytest
+
+import dpnp as cupy
+from tests.third_party.cupy import testing
+
+
+def _generate_type_routines_input(xp, dtype, obj_type):
+    dtype = numpy.dtype(dtype)
+    if obj_type == "dtype":
+        return dtype
+    if obj_type == "specifier":
+        return str(dtype)
+    if obj_type == "scalar":
+        return dtype.type(3)
+    if obj_type == "array":
+        return xp.zeros(3, dtype=dtype)
+    if obj_type == "primitive":
+        return type(dtype.type(3).tolist())
+    assert False
+
+
+@testing.parameterize(
+    *testing.product(
+        {
+            "obj_type": ["dtype", "specifier", "scalar", "array", "primitive"],
+        }
+    )
+)
+class TestCanCast(unittest.TestCase):
+    @testing.for_all_dtypes_combination(names=("from_dtype", "to_dtype"))
+    @testing.numpy_cupy_equal()
+    def test_can_cast(self, xp, from_dtype, to_dtype):
+        if self.obj_type == "scalar":
+            pytest.skip("to be aligned with NEP-50")
+
+        from_obj = _generate_type_routines_input(xp, from_dtype, self.obj_type)
+
+        ret = xp.can_cast(from_obj, to_dtype)
+        assert isinstance(ret, bool)
+        return ret
+
+
+@pytest.mark.skip("dpnp.common_type() is not implemented yet")
+class TestCommonType(unittest.TestCase):
+    @testing.numpy_cupy_equal()
+    def test_common_type_empty(self, xp):
+        ret = xp.common_type()
+        assert type(ret) == type
+        return ret
+
+    @testing.for_all_dtypes(no_bool=True)
+    @testing.numpy_cupy_equal()
+    def test_common_type_single_argument(self, xp, dtype):
+        array = _generate_type_routines_input(xp, dtype, "array")
+        ret = xp.common_type(array)
+        assert type(ret) == type
+        return ret
+
+    @testing.for_all_dtypes_combination(
+        names=("dtype1", "dtype2"), no_bool=True
+    )
+    @testing.numpy_cupy_equal()
+    def test_common_type_two_arguments(self, xp, dtype1, dtype2):
+        array1 = _generate_type_routines_input(xp, dtype1, "array")
+        array2 = _generate_type_routines_input(xp, dtype2, "array")
+        ret = xp.common_type(array1, array2)
+        assert type(ret) == type
+        return ret
+
+    @testing.for_all_dtypes()
+    def test_common_type_bool(self, dtype):
+        for xp in (numpy, cupy):
+            array1 = _generate_type_routines_input(xp, dtype, "array")
+            array2 = _generate_type_routines_input(xp, "bool_", "array")
+            with pytest.raises(TypeError):
+                xp.common_type(array1, array2)
+
+
+@testing.parameterize(
+    *testing.product(
+        {
+            "obj_type1": ["dtype", "specifier", "scalar", "array", "primitive"],
+            "obj_type2": ["dtype", "specifier", "scalar", "array", "primitive"],
+        }
+    )
+)
+# TODO: Temporary skipping the test, until Internal CI is updated with
+# recent changed in dpctl regarding dpt.result_type function
+@pytest.mark.skip("Temporary skipping the test")
+class TestResultType(unittest.TestCase):
+    @testing.for_all_dtypes_combination(names=("dtype1", "dtype2"))
+    @testing.numpy_cupy_equal()
+    def test_result_type(self, xp, dtype1, dtype2):
+        if "scalar" in {self.obj_type1, self.obj_type2}:
+            pytest.skip("to be aligned with NEP-50")
+
+        input1 = _generate_type_routines_input(xp, dtype1, self.obj_type1)
+
+        input2 = _generate_type_routines_input(xp, dtype2, self.obj_type2)
+        ret = xp.result_type(input1, input2)
+        assert isinstance(ret, numpy.dtype)
+        return ret
diff --git a/tests/third_party/numpy_ext/__init__.py b/tests/third_party/numpy_ext/__init__.py
index 0401f29402f..9762d292ffe 100644
--- a/tests/third_party/numpy_ext/__init__.py
+++ b/tests/third_party/numpy_ext/__init__.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
diff --git a/tests_external/numpy/runtests.py b/tests_external/numpy/runtests.py
index 44ac16bb9d1..d0c605e21be 100644
--- a/tests_external/numpy/runtests.py
+++ b/tests_external/numpy/runtests.py
@@ -2,7 +2,7 @@
 # distutils: language = c++
 # -*- coding: utf-8 -*-
 # *****************************************************************************
-# Copyright (c) 2016-2023, Intel Corporation
+# Copyright (c) 2016-2024, Intel Corporation
 # All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without