diff --git a/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp b/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp
index a5585202c31..33f690d2110 100644
--- a/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp
+++ b/cpp/oneapi/dal/algo/linear_regression/test/batch.cpp
@@ -51,9 +51,10 @@ TEMPLATE_LIST_TEST_M(lr_batch_test, "LR common flow", "[lr][batch]", lr_types) {
 }
 
 TEMPLATE_LIST_TEST_M(lr_batch_test, "LR with non-PSD matrix", "[lr][batch-nonpsd]", lr_types) {
-    SKIP_IF(this->non_psd_system_not_supported_on_device());
+    SKIP_IF(this->not_float64_friendly());
 
     this->generate(777);
+
     this->run_and_check_linear_indefinite();
     this->run_and_check_linear_indefinite_multioutput();
 }
diff --git a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp
index e1e54092552..bfba2abf9e5 100644
--- a/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp
+++ b/cpp/oneapi/dal/algo/linear_regression/test/fixture.hpp
@@ -65,10 +65,6 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
         return static_cast<Derived*>(this);
     }
 
-    bool non_psd_system_not_supported_on_device() {
-        return this->get_policy().is_gpu();
-    }
-
     table compute_responses(const table& beta, const table& bias, const table& data) const {
         const auto s_count = data.get_row_count();
 
@@ -299,18 +295,20 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
     here is not positive-definite, thus it has an infinite number of possible solutions. The
     solution here is the one with minimum norm, which is typically more desirable. */
     void run_and_check_linear_indefinite(double tol = 1e-3) {
-        const double X[] = { -0.98912135, -0.36778665, 1.28792526, 0.19397442,  0.9202309,
-                             0.57710379,  -0.63646365, 0.54195222, -0.31659545, -0.32238912,
-                             0.09716732,  -1.52593041, 1.1921661,  -0.67108968, 1.00026942 };
-        const double y[] = { 0.13632112, 1.53203308, -0.65996941 };
+        const auto dtype =
+            std::is_same<float_t, double>::value ? data_type::float64 : data_type::float32;
+        const float_t X[] = { -0.98912135, -0.36778665, 1.28792526, 0.19397442,  0.9202309,
+                              0.57710379,  -0.63646365, 0.54195222, -0.31659545, -0.32238912,
+                              0.09716732,  -1.52593041, 1.1921661,  -0.67108968, 1.00026942 };
+        const float_t y[] = { 0.13632112, 1.53203308, -0.65996941 };
         auto X_tbl = oneapi::dal::detail::homogen_table_builder()
-                         .set_data_type(data_type::float64)
+                         .set_data_type(dtype)
                          .set_layout(data_layout::row_major)
                          .allocate(3, 5)
                          .copy_data(X, 3, 5)
                          .build();
         auto y_tbl = oneapi::dal::detail::homogen_table_builder()
-                         .set_data_type(data_type::float64)
+                         .set_data_type(dtype)
                          .set_layout(data_layout::row_major)
                          .allocate(3, 1)
                          .copy_data(y, 3, 1)
@@ -321,20 +319,20 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
         const auto coefs = train_res.get_coefficients();
 
         if (desc.get_result_options().test(result_options::intercept)) {
-            const double expected_beta[] = { 0.27785494,
-                                             0.53011669,
-                                             0.34352259,
-                                             0.40506216,
-                                             -1.26026447 };
-            const double expected_intercept[] = { 1.24485441 };
+            const float_t expected_beta[] = { 0.27785494,
+                                              0.53011669,
+                                              0.34352259,
+                                              0.40506216,
+                                              -1.26026447 };
+            const float_t expected_intercept[] = { 1.24485441 };
             const auto expected_beta_tbl = oneapi::dal::detail::homogen_table_builder()
-                                               .set_data_type(data_type::float64)
+                                               .set_data_type(dtype)
                                                .set_layout(data_layout::row_major)
                                                .allocate(1, 5)
                                                .copy_data(expected_beta, 1, 5)
                                                .build();
             const auto expected_intercept_tbl = oneapi::dal::detail::homogen_table_builder()
-                                                    .set_data_type(data_type::float64)
+                                                    .set_data_type(dtype)
                                                     .set_layout(data_layout::row_major)
                                                     .allocate(1, 1)
                                                     .copy_data(expected_intercept, 1, 1)
@@ -351,13 +349,13 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
         }
 
         else {
-            const double expected_beta[] = { 0.38217445,
-                                             0.2732197,
-                                             1.87135517,
-                                             0.63458468,
-                                             -2.08473134 };
+            const float_t expected_beta[] = { 0.38217445,
+                                              0.2732197,
+                                              1.87135517,
+                                              0.63458468,
+                                              -2.08473134 };
             const auto expected_beta_tbl = oneapi::dal::detail::homogen_table_builder()
-                                               .set_data_type(data_type::float64)
+                                               .set_data_type(dtype)
                                                .set_layout(data_layout::row_major)
                                                .allocate(1, 5)
                                                .copy_data(expected_beta, 1, 5)
@@ -369,19 +367,21 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
     }
 
     void run_and_check_linear_indefinite_multioutput(double tol = 1e-3) {
-        const double X[] = { -0.98912135, -0.36778665, 1.28792526, 0.19397442,  0.9202309,
-                             0.57710379,  -0.63646365, 0.54195222, -0.31659545, -0.32238912,
-                             0.09716732,  -1.52593041, 1.1921661,  -0.67108968, 1.00026942 };
-        const double y[] = { 0.13632112,  1.53203308, -0.65996941,
-                             -0.31179486, 0.33776913, -2.2074711 };
+        const auto dtype =
+            std::is_same<float_t, double>::value ? data_type::float64 : data_type::float32;
+        const float_t X[] = { -0.98912135, -0.36778665, 1.28792526, 0.19397442,  0.9202309,
+                              0.57710379,  -0.63646365, 0.54195222, -0.31659545, -0.32238912,
+                              0.09716732,  -1.52593041, 1.1921661,  -0.67108968, 1.00026942 };
+        const float_t y[] = { 0.13632112,  1.53203308, -0.65996941,
+                              -0.31179486, 0.33776913, -2.2074711 };
         auto X_tbl = oneapi::dal::detail::homogen_table_builder()
-                         .set_data_type(data_type::float64)
+                         .set_data_type(dtype)
                          .set_layout(data_layout::row_major)
                          .allocate(3, 5)
                          .copy_data(X, 3, 5)
                          .build();
         auto y_tbl = oneapi::dal::detail::homogen_table_builder()
-                         .set_data_type(data_type::float64)
+                         .set_data_type(dtype)
                          .set_layout(data_layout::row_major)
                          .allocate(3, 2)
                          .copy_data(y, 3, 2)
@@ -392,19 +392,19 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
         const auto coefs = train_res.get_coefficients();
 
         if (desc.get_result_options().test(result_options::intercept)) {
-            const double expected_beta[] = {
+            const float_t expected_beta[] = {
                 -0.18692112, -0.20034801, -0.09590892, -0.13672683, 0.56229012,
                 -0.97006008, 1.39413595,  0.49238012,  1.11041239,  -0.79213452,
             };
-            const double expected_intercept[] = { -0.48964358, 0.96467681 };
+            const float_t expected_intercept[] = { -0.48964358, 0.96467681 };
             const auto expected_beta_tbl = oneapi::dal::detail::homogen_table_builder()
-                                               .set_data_type(data_type::float64)
+                                               .set_data_type(dtype)
                                                .set_layout(data_layout::row_major)
                                                .allocate(2, 5)
                                                .copy_data(expected_beta, 2, 5)
                                                .build();
             const auto expected_intercept_tbl = oneapi::dal::detail::homogen_table_builder()
-                                                    .set_data_type(data_type::float64)
+                                                    .set_data_type(dtype)
                                                     .set_layout(data_layout::row_major)
                                                     .allocate(1, 2)
                                                     .copy_data(expected_intercept, 1, 2)
@@ -421,11 +421,11 @@ class lr_test : public te::crtp_algo_fixture<TestType, Derived> {
         }
 
         else {
-            const double expected_beta[] = { -0.22795353, -0.09930168, -0.69685744, -0.22700585,
-                                             0.88658098,  -0.88921961, 1.19505839,  1.67634561,
-                                             1.2882766,   -1.43103981 };
+            const float_t expected_beta[] = { -0.22795353, -0.09930168, -0.69685744, -0.22700585,
+                                              0.88658098,  -0.88921961, 1.19505839,  1.67634561,
+                                              1.2882766,   -1.43103981 };
             const auto expected_beta_tbl = oneapi::dal::detail::homogen_table_builder()
-                                               .set_data_type(data_type::float64)
+                                               .set_data_type(dtype)
                                                .set_layout(data_layout::row_major)
                                                .allocate(2, 5)
                                                .copy_data(expected_beta, 2, 5)
diff --git a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp
index 223f6753f69..12eb654bbe4 100644
--- a/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp
+++ b/cpp/oneapi/dal/backend/primitives/lapack/solve_dpc.cpp
@@ -14,9 +14,14 @@
 * limitations under the License.
 *******************************************************************************/
 
+#include <limits>
+
+#include "oneapi/dal/backend/primitives/blas/gemm.hpp"
+#include "oneapi/dal/backend/primitives/blas/gemv.hpp"
 #include "oneapi/dal/backend/primitives/lapack.hpp"
 #include "oneapi/dal/backend/primitives/ndarray.hpp"
 #include "oneapi/dal/backend/primitives/ndindexer.hpp"
+#include "oneapi/dal/detail/error_messages.hpp"
 
 namespace oneapi::dal::backend::primitives {
 
@@ -59,6 +64,175 @@ inline sycl::event beta_copy_transform(sycl::queue& queue,
     });
 }
 
+/// This is an adaptation of the CPU version, which can be found in this file:
+/// cpp/daal/src/algorithms/service_kernel_math.h
+///
+/// It solves a linear system A*x = b
+/// where 'b' might be a matrix (multiple RHS)
+///
+/// It is intended as a fallback for solving linear regression, where these
+/// matrices are obtained as follows:
+///     A = t(X)*X
+///     b = t(X)*y
+///
+/// It can handle systems that are not positive semi-definite, so it's used
+/// as a fallback when Cholesky fails or when it involves too small numbers
+/// (which tends to happen when floating point error results in a slightly
+/// positive matrix when it should have zero determinant in theory).
+template <mkl::uplo uplo, typename Float>
+sycl::event solve_spectral_decomposition(
+    sycl::queue& queue,
+    ndview<Float, 2>& A, /// t(X)*X, LHS, gets overwritten, dim=[dim_A, dim_A] (symmetric)
+    sycl::event& event_A,
+    ndview<Float, 2>&
+        b, /// t(X)*y, RHS, solution is outputted here, dim=[dim_A, nrhs] in colmajor order
+    sycl::event& event_b,
+    const std::int64_t dim_A,
+    const std::int64_t nrhs) {
+    constexpr auto alloc = sycl::usm::alloc::device;
+
+    /// Decompose: A = Q * diag(l) * t(Q)
+    /// Note: for NRHS>1, this will overallocate in order to reuse the memory as buffer later on
+    auto eigenvalues = ndarray<Float, 1>::empty(queue, dim_A * nrhs, alloc);
+    sycl::event syevd_event =
+        syevd<mkl::job::vec, uplo, Float>(queue, dim_A, A, dim_A, eigenvalues, { event_A });
+    const Float eps = std::numeric_limits<Float>::epsilon();
+
+    /// Discard too small singular values
+    std::int64_t num_discarded;
+    {
+        /// This is placed inside a block because the array created here is not used any further
+        /// Note: the array for 'eigenvalues' is allocated with size [dimA, nrhs],
+        /// but by this point, only 'dimA' elements will have been written to it.
+        auto eigenvalues_cpu =
+            ndview<Float, 1>::wrap(eigenvalues.get_data(), dim_A).to_host(queue, { syevd_event });
+        const Float* eigenvalues_cpu_ptr = eigenvalues_cpu.get_data();
+        const Float largest_ev = eigenvalues_cpu_ptr[dim_A - 1];
+        if (largest_ev <= eps) {
+            throw internal_error{ dal::detail::error_messages::too_small_singular_values() };
+        }
+        const Float component_threshold = eps * largest_ev;
+        for (num_discarded = 0; num_discarded < dim_A - 1; num_discarded++) {
+            if (eigenvalues_cpu_ptr[num_discarded] > component_threshold)
+                break;
+        }
+    }
+
+    /// Create the square root of the inverse: Qis = Q * diag(1 / sqrt(l))
+    std::int64_t num_taken = dim_A - num_discarded;
+    auto ev_mutable = eigenvalues.get_mutable_data();
+    sycl::event inv_sqrt_eigenvalues_event = queue.submit([&](sycl::handler& h) {
+        h.depends_on(syevd_event);
+        h.parallel_for(num_taken, [=](const auto& i) {
+            const std::size_t ix = i + num_discarded;
+            ev_mutable[ix] = sycl::rsqrt(ev_mutable[ix]);
+        });
+    });
+
+    auto Q_mutable = A.get_mutable_data();
+    sycl::event inv_sqrt_eigenvectors_event = queue.submit([&](sycl::handler& h) {
+        const auto range = oneapi::dal::backend::make_range_2d(num_taken, dim_A);
+        h.depends_on(inv_sqrt_eigenvalues_event);
+        h.parallel_for(range, [=](sycl::id<2> id) {
+            const std::size_t col = id[0] + num_discarded;
+            const std::size_t row = id[1];
+            Q_mutable[row + col * dim_A] *= ev_mutable[col];
+        });
+    });
+
+    /// Now calculate the actual solution: Qis * Qis' * B
+    const std::int64_t eigenvectors_offset = num_discarded * dim_A;
+    if (nrhs == 1) {
+        auto ev_mutable_vec_view = ndview<Float, 1>::wrap(ev_mutable, num_taken);
+        auto b_mutable_vec_view = ndview<Float, 1>::wrap(b.get_mutable_data(), dim_A);
+        sycl::event gemv_right_event =
+            gemv(queue,
+                 ndview<Float, 2, ndorder::c>::wrap(Q_mutable + eigenvectors_offset,
+                                                    ndshape<2>(num_taken, dim_A)),
+                 b_mutable_vec_view,
+                 ev_mutable_vec_view,
+                 Float(1),
+                 Float(0),
+                 { inv_sqrt_eigenvectors_event, event_b });
+        return gemv(queue,
+                    ndview<Float, 2, ndorder::f>::wrap(Q_mutable + eigenvectors_offset,
+                                                       ndshape<2>(dim_A, num_taken)),
+                    ev_mutable_vec_view,
+                    b_mutable_vec_view,
+                    Float(1),
+                    Float(0),
+                    { gemv_right_event });
+    }
+
+    else {
+        auto ev_mutable_mat_view =
+            ndview<Float, 2, ndorder::c>::wrap(ev_mutable, ndshape<2>(nrhs, num_taken));
+        auto b_mutable_mat_view =
+            ndview<Float, 2, ndorder::c>::wrap(b.get_mutable_data(), ndshape<2>(nrhs, dim_A));
+        sycl::event gemm_right_event =
+            gemm(queue,
+                 b_mutable_mat_view,
+                 ndview<Float, 2, ndorder::f>::wrap(Q_mutable + eigenvectors_offset,
+                                                    ndshape<2>(dim_A, num_taken)),
+                 ev_mutable_mat_view,
+                 Float(1),
+                 Float(0),
+                 { inv_sqrt_eigenvectors_event, event_b });
+        return gemm(queue,
+                    ev_mutable_mat_view,
+                    ndview<Float, 2, ndorder::c>::wrap(Q_mutable + eigenvectors_offset,
+                                                       ndshape<2>(num_taken, dim_A)),
+                    b_mutable_mat_view,
+                    Float(1),
+                    Float(0),
+                    { gemm_right_event });
+    }
+}
+
+/// Returns the minimum value among entries in the diagonal of a square matrix
+template <typename Float>
+Float diagonal_minimum(sycl::queue& queue,
+                       const Float* Matrix,
+                       const std::int64_t dim_matrix,
+                       sycl::event& event_Matrix) {
+    constexpr auto alloc = sycl::usm::alloc::device;
+    auto diag_min_holder = array<Float>::empty(queue, 1, alloc);
+    sycl::event diag_min_event = queue.submit([&](sycl::handler& h) {
+        auto min_reduction = sycl::reduction(diag_min_holder.get_mutable_data(),
+                                             sycl::minimum<>(),
+                                             sycl::property::reduction::initialize_to_identity());
+        h.depends_on({ event_Matrix });
+        h.parallel_for(dim_matrix, min_reduction, [=](const auto& i, auto& min_obj) {
+            min_obj.combine(Matrix[i * (dim_matrix + 1)]);
+        });
+    });
+    return ndview<Float, 1>::wrap(diag_min_holder).at_device(queue, 0, { diag_min_event });
+}
+
+template <mkl::uplo uplo, typename Float, ndorder xlayout, ndorder ylayout>
+sycl::event solve_with_fallback(
+    sycl::queue& queue,
+    const ndview<Float, 2, xlayout>& xtx,
+    const ndview<Float, 2, ylayout>& xty,
+    ndview<Float, 2, ndorder::c>& nxtx,
+    ndview<Float, 2, ndorder::c>& nxty, /// solution will be written here
+    const event_vector& dependencies) {
+    const std::int64_t dim_xtx = xtx.get_dimension(0);
+    const std::int64_t nrhs = nxty.get_dimension(0);
+    /// Note: this templated version of 'copy' reuses a layout that should have been
+    /// specified in a previous copy before the fallback.
+    sycl::event xtx_event_new = copy(queue, nxtx, xtx, dependencies);
+    sycl::event xty_event_new = copy(queue, nxty, xty, dependencies);
+
+    return solve_spectral_decomposition<uplo, Float>(queue,
+                                                     nxtx,
+                                                     xtx_event_new,
+                                                     nxty,
+                                                     xty_event_new,
+                                                     dim_xtx,
+                                                     nrhs);
+}
+
 template <mkl::uplo uplo, bool beta, typename Float, ndorder xlayout, ndorder ylayout>
 sycl::event solve_system(sycl::queue& queue,
                          const ndview<Float, 2, xlayout>& xtx,
@@ -70,12 +244,38 @@ sycl::event solve_system(sycl::queue& queue,
 
     auto [nxty, xty_event] = copy<ndorder::c, Float, ylayout, alloc>(queue, xty, dependencies);
     auto [nxtx, xtx_event] = copy<ndorder::c, Float, xlayout, alloc>(queue, xtx, dependencies);
+    const std::int64_t dim_xtx = xtx.get_dimension(0);
+
+    sycl::event solution_event;
 
     opt_array<Float> dummy{};
-    auto potrf_event = potrf_factorization<uplo>(queue, nxtx, dummy, { xtx_event });
-    auto potrs_event = potrs_solution<uplo>(queue, nxtx, nxty, dummy, { potrf_event, xty_event });
+    /// Note: this is wrapped in a try-catch block in order to catch a potential exception
+    /// thrown by oneMKL when the Cholesky factorization ('potrs') fails due to the matrix
+    /// not being positive-definite. In such case, it will then fall back to spectral
+    /// decomposition-based inversion, which is able to handle such problems.
+    try {
+        sycl::event potrf_event = potrf_factorization<uplo>(queue, nxtx, dummy, { xtx_event });
+        queue.wait_and_throw();
+        const Float diag_min = diagonal_minimum(queue, nxtx.get_data(), dim_xtx, potrf_event);
+        /// Note: this threshold was chosen as a guess for when Cholesky factorization might
+        /// succeed despite the matrix in theory not being positive-definite, or succeed but
+        /// having too large numerical inaccuracies. In such cases, there should be too small
+        /// singular values that the fallback will discard, but there's no guaranteed match
+        /// between singular values and entries in the Cholesky diagonal. This is just a guess.
+        const Float threshold_diagonal_min = 1e-6;
+        if (diag_min > threshold_diagonal_min) {
+            solution_event =
+                potrs_solution<uplo>(queue, nxtx, nxty, dummy, { potrf_event, xty_event });
+        }
+        else {
+            solution_event = solve_with_fallback<uplo>(queue, xtx, xty, nxtx, nxty, dependencies);
+        }
+    }
+    catch (mkl::lapack::computation_error& ex) {
+        solution_event = solve_with_fallback<uplo>(queue, xtx, xty, nxtx, nxty, dependencies);
+    }
 
-    return beta_copy_transform<beta>(queue, nxty, final_xty, { potrs_event });
+    return beta_copy_transform<beta>(queue, nxty, final_xty, { solution_event });
 }
 
 #define INSTANTIATE(U, B, F, XL, YL)                                 \
diff --git a/cpp/oneapi/dal/detail/error_messages.cpp b/cpp/oneapi/dal/detail/error_messages.cpp
index 20ce68e0ef2..85cbb969099 100644
--- a/cpp/oneapi/dal/detail/error_messages.cpp
+++ b/cpp/oneapi/dal/detail/error_messages.cpp
@@ -303,6 +303,8 @@ MSG(input_y_is_empty, "Input y is empty")
 /* Linear Regression */
 MSG(intercept_result_option_requires_intercept_flag,
     "Intercept result option requires intercept flag")
+MSG(too_small_singular_values,
+    "Linear system has too small singular values, cannot calculate a solution")
 
 /* Logistic Regression */
 MSG(class_count_neq_two,
diff --git a/cpp/oneapi/dal/detail/error_messages.hpp b/cpp/oneapi/dal/detail/error_messages.hpp
index dbb9c8ba6ec..926e89321b0 100644
--- a/cpp/oneapi/dal/detail/error_messages.hpp
+++ b/cpp/oneapi/dal/detail/error_messages.hpp
@@ -242,6 +242,7 @@ class ONEDAL_EXPORT error_messages {
 
     /* Linear Regression */
     MSG(intercept_result_option_requires_intercept_flag);
+    MSG(too_small_singular_values);
 
     /* Logistic Regression */
     MSG(class_count_neq_two);