diff --git a/scaluq/gate/update_ops_dense_matrix.cpp b/scaluq/gate/update_ops_dense_matrix.cpp index bc15364d..e0b02f87 100644 --- a/scaluq/gate/update_ops_dense_matrix.cpp +++ b/scaluq/gate/update_ops_dense_matrix.cpp @@ -20,6 +20,7 @@ void single_qubit_dense_matrix_gate(UINT target_qubit_index, state._raw[basis_0] = res0; state._raw[basis_1] = res1; }); + Kokkos::fence(); } void double_qubit_dense_matrix_gate(UINT target0, @@ -49,6 +50,7 @@ void double_qubit_dense_matrix_gate(UINT target0, state._raw[basis_2] = res2; state._raw[basis_3] = res3; }); + Kokkos::fence(); } } // namespace internal } // namespace scaluq diff --git a/scaluq/gate/update_ops_npair_qubit.cpp b/scaluq/gate/update_ops_npair_qubit.cpp index 23dd8b81..1d72f0e1 100644 --- a/scaluq/gate/update_ops_npair_qubit.cpp +++ b/scaluq/gate/update_ops_npair_qubit.cpp @@ -31,6 +31,7 @@ void fusedswap_gate(UINT target_qubit_index_0, Kokkos::Experimental::swap(state._raw[i], state._raw[j]); } }); + Kokkos::fence(); } } // namespace internal } // namespace scaluq diff --git a/scaluq/gate/update_ops_one_control_one_target.cpp b/scaluq/gate/update_ops_one_control_one_target.cpp index 505be5db..9d07f308 100644 --- a/scaluq/gate/update_ops_one_control_one_target.cpp +++ b/scaluq/gate/update_ops_one_control_one_target.cpp @@ -16,6 +16,7 @@ void cx_gate(UINT control_qubit_index, UINT target_qubit_index, StateVector& sta i |= 1ULL << control_qubit_index; Kokkos::Experimental::swap(state._raw[i], state._raw[i | (1ULL << target_qubit_index)]); }); + Kokkos::fence(); } void cz_gate(UINT control_qubit_index, UINT target_qubit_index, StateVector& state) { @@ -27,6 +28,7 @@ void cz_gate(UINT control_qubit_index, UINT target_qubit_index, StateVector& sta i |= 1ULL << target_qubit_index; state._raw[i] *= -1; }); + Kokkos::fence(); } } // namespace internal } // namespace scaluq diff --git a/scaluq/gate/update_ops_one_qubit.cpp b/scaluq/gate/update_ops_one_qubit.cpp index 25a5a978..32bc365c 100644 --- a/scaluq/gate/update_ops_one_qubit.cpp +++ b/scaluq/gate/update_ops_one_qubit.cpp @@ -14,6 +14,7 @@ void x_gate(UINT target_qubit_index, StateVector& state) { UINT i = internal::insert_zero_to_basis_index(it, target_qubit_index); Kokkos::Experimental::swap(state._raw[i], state._raw[i | (1ULL << target_qubit_index)]); }); + Kokkos::fence(); } void y_gate(UINT target_qubit_index, StateVector& state) { @@ -24,6 +25,7 @@ void y_gate(UINT target_qubit_index, StateVector& state) { state._raw[i | (1ULL << target_qubit_index)] *= Complex(0, -1); Kokkos::Experimental::swap(state._raw[i], state._raw[i | (1ULL << target_qubit_index)]); }); + Kokkos::fence(); } void z_gate(UINT target_qubit_index, StateVector& state) { @@ -32,6 +34,7 @@ void z_gate(UINT target_qubit_index, StateVector& state) { UINT i = internal::insert_zero_to_basis_index(it, target_qubit_index); state._raw[i | (1ULL << target_qubit_index)] *= Complex(-1, 0); }); + Kokkos::fence(); } void h_gate(UINT target_qubit_index, StateVector& state) { @@ -43,6 +46,7 @@ void h_gate(UINT target_qubit_index, StateVector& state) { state._raw[i] = (a + b) * INVERSE_SQRT2(); state._raw[i | (1ULL << target_qubit_index)] = (a - b) * INVERSE_SQRT2(); }); + Kokkos::fence(); } void single_qubit_phase_gate(UINT target_qubit_index, Complex phase, StateVector& state) { @@ -51,6 +55,7 @@ void single_qubit_phase_gate(UINT target_qubit_index, Complex phase, StateVector UINT i = internal::insert_zero_to_basis_index(it, target_qubit_index); state._raw[i | (1ULL << target_qubit_index)] *= phase; }); + Kokkos::fence(); } void s_gate(UINT target_qubit_index, StateVector& state) { @@ -113,6 +118,7 @@ void single_qubit_diagonal_matrix_gate(UINT target_qubit_index, Kokkos::parallel_for( state.dim(), KOKKOS_LAMBDA(UINT it) { state._raw[it] *= diag.val[(it >> target_qubit_index) & 1]; }); + Kokkos::fence(); } void rz_gate(UINT target_qubit_index, double angle, StateVector& state) { diff --git a/scaluq/gate/update_ops_pauli.cpp b/scaluq/gate/update_ops_pauli.cpp index 4331dbb2..21ef7b0b 100644 --- a/scaluq/gate/update_ops_pauli.cpp +++ b/scaluq/gate/update_ops_pauli.cpp @@ -31,6 +31,7 @@ void pauli_rotation_gate(const PauliOperator& pauli, double angle, StateVector& } state._raw[state_idx] *= coef; }); + Kokkos::fence(); return; } else { const UINT insert_idx = internal::BitVector(bit_flip_mask_vector).msb(); @@ -58,6 +59,7 @@ void pauli_rotation_gate(const PauliOperator& pauli, double angle, StateVector& state._raw[basis_0] *= coef; state._raw[basis_1] *= coef; }); + Kokkos::fence(); } } diff --git a/scaluq/gate/update_ops_two_qubit.cpp b/scaluq/gate/update_ops_two_qubit.cpp index bc142966..54dd2c68 100644 --- a/scaluq/gate/update_ops_two_qubit.cpp +++ b/scaluq/gate/update_ops_two_qubit.cpp @@ -15,6 +15,7 @@ void swap_gate(UINT target0, UINT target1, StateVector& state) { Kokkos::Experimental::swap(state._raw[basis | (1ULL << target0)], state._raw[basis | (1ULL << target1)]); }); + Kokkos::fence(); } } // namespace internal } // namespace scaluq diff --git a/scaluq/gate/update_ops_zero_qubit.cpp b/scaluq/gate/update_ops_zero_qubit.cpp index c2263210..fe13f51c 100644 --- a/scaluq/gate/update_ops_zero_qubit.cpp +++ b/scaluq/gate/update_ops_zero_qubit.cpp @@ -8,6 +8,7 @@ void global_phase_gate(double phase, StateVector& state) { Complex coef = Kokkos::polar(1., phase); Kokkos::parallel_for( state.dim(), KOKKOS_LAMBDA(UINT i) { state._raw[i] *= coef; }); + Kokkos::fence(); } } // namespace internal } // namespace scaluq diff --git a/scaluq/operator/operator.cpp b/scaluq/operator/operator.cpp index 0c66e323..fa5fc0cd 100644 --- a/scaluq/operator/operator.cpp +++ b/scaluq/operator/operator.cpp @@ -152,10 +152,10 @@ Complex Operator::get_expectation_value(const StateVector& state_vector) const { res); return coef * res; }(); - team.team_barrier(); Kokkos::single(Kokkos::PerTeam(team), [&] { term_res += tmp; }); }, res); + Kokkos::fence(); return res; } @@ -202,8 +202,8 @@ Complex Operator::get_transition_amplitude(const StateVector& state_vector_bra, UINT phase_flip_mask = pmasks[term_id]; Complex coef = coefs[term_id]; Complex tmp = [&] { + Complex res; if (bit_flip_mask == 0) { - Complex res; Kokkos::parallel_reduce( Kokkos::TeamThreadRange(team, dim), [=](const UINT& state_idx, Complex& sum) { @@ -213,32 +213,33 @@ Complex Operator::get_transition_amplitude(const StateVector& state_vector_bra, sum += tmp; }, res); - return coef * res; + } else { + UINT pivot = sizeof(UINT) * 8 - Kokkos::countl_zero(bit_flip_mask) - 1; + UINT global_phase_90rot_count = + Kokkos::popcount(bit_flip_mask & phase_flip_mask); + Complex global_phase = PHASE_90ROT().val[global_phase_90rot_count % 4]; + Kokkos::parallel_reduce( + Kokkos::TeamThreadRange(team, dim >> 1), + [=](const UINT& state_idx, Complex& sum) { + UINT basis_0 = internal::insert_zero_to_basis_index(state_idx, pivot); + UINT basis_1 = basis_0 ^ bit_flip_mask; + Complex tmp1 = Kokkos::conj(state_vector_bra._raw[basis_1]) * + state_vector_ket._raw[basis_0] * global_phase; + if (Kokkos::popcount(basis_0 & phase_flip_mask) & 1) tmp1 = -tmp1; + Complex tmp2 = Kokkos::conj(state_vector_bra._raw[basis_0]) * + state_vector_ket._raw[basis_1] * global_phase; + if (Kokkos::popcount(basis_1 & phase_flip_mask) & 1) tmp2 = -tmp2; + sum += tmp1 + tmp2; + }, + res); } - UINT pivot = sizeof(UINT) * 8 - Kokkos::countl_zero(bit_flip_mask) - 1; - UINT global_phase_90rot_count = Kokkos::popcount(bit_flip_mask & phase_flip_mask); - Complex global_phase = PHASE_90ROT().val[global_phase_90rot_count % 4]; - Complex res; - Kokkos::parallel_reduce( - Kokkos::TeamThreadRange(team, dim >> 1), - [=](const UINT& state_idx, Complex& sum) { - UINT basis_0 = internal::insert_zero_to_basis_index(state_idx, pivot); - UINT basis_1 = basis_0 ^ bit_flip_mask; - Complex tmp1 = Kokkos::conj(state_vector_bra._raw[basis_1]) * - state_vector_ket._raw[basis_0] * global_phase; - if (Kokkos::popcount(basis_0 & phase_flip_mask) & 1) tmp1 = -tmp1; - Complex tmp2 = Kokkos::conj(state_vector_bra._raw[basis_0]) * - state_vector_ket._raw[basis_1] * global_phase; - if (Kokkos::popcount(basis_1 & phase_flip_mask) & 1) tmp2 = -tmp2; - sum += tmp1 + tmp2; - }, - res); + team.team_barrier(); return coef * res; }(); - team.team_barrier(); Kokkos::single(Kokkos::PerTeam(team), [&] { term_res += tmp; }); }, res); + Kokkos::fence(); return res; } diff --git a/scaluq/operator/pauli_operator.cpp b/scaluq/operator/pauli_operator.cpp index 4397a2ff..a7cc0faa 100644 --- a/scaluq/operator/pauli_operator.cpp +++ b/scaluq/operator/pauli_operator.cpp @@ -125,6 +125,7 @@ void PauliOperator::apply_to_state(StateVector& state_vector) const { state_vector._raw[state_idx] *= coef; } }); + Kokkos::fence(); return; } UINT pivot = sizeof(UINT) * 8 - std::countl_zero(bit_flip_mask) - 1; @@ -141,6 +142,7 @@ void PauliOperator::apply_to_state(StateVector& state_vector) const { state_vector._raw[basis_0] = tmp2 * coef; state_vector._raw[basis_1] = tmp1 * coef; }); + Kokkos::fence(); } Complex PauliOperator::get_expectation_value(const StateVector& state_vector) const { @@ -206,6 +208,7 @@ Complex PauliOperator::get_transition_amplitude(const StateVector& state_vector_ sum += tmp; }, res); + Kokkos::fence(); return _coef * res; } UINT pivot = sizeof(UINT) * 8 - std::countl_zero(bit_flip_mask) - 1; @@ -226,6 +229,7 @@ Complex PauliOperator::get_transition_amplitude(const StateVector& state_vector_ sum += tmp1 + tmp2; }, res); + Kokkos::fence(); return _coef * res; } diff --git a/scaluq/state/state_vector.cpp b/scaluq/state/state_vector.cpp index 4caea3ed..db44672a 100644 --- a/scaluq/state/state_vector.cpp +++ b/scaluq/state/state_vector.cpp @@ -51,6 +51,7 @@ StateVector StateVector::Haar_random_state(UINT n_qubits, UINT seed) { state._raw[i] = Complex(rand_gen.normal(0.0, 1.0), rand_gen.normal(0.0, 1.0)); rand_pool.free_state(rand_gen); }); + Kokkos::fence(); state.normalize(); return state; } @@ -76,6 +77,7 @@ void StateVector::normalize() { const auto norm = std::sqrt(this->get_squared_norm()); Kokkos::parallel_for( this->_dim, KOKKOS_CLASS_LAMBDA(UINT it) { this->_raw[it] /= norm; }); + Kokkos::fence(); } double StateVector::get_zero_probability(UINT target_qubit_index) const { @@ -159,16 +161,19 @@ double StateVector::get_entropy() const { void StateVector::add_state_vector(const StateVector& state) { Kokkos::parallel_for( this->_dim, KOKKOS_CLASS_LAMBDA(UINT i) { this->_raw[i] += state._raw[i]; }); + Kokkos::fence(); } void StateVector::add_state_vector_with_coef(const Complex& coef, const StateVector& state) { Kokkos::parallel_for( this->_dim, KOKKOS_CLASS_LAMBDA(UINT i) { this->_raw[i] += coef * state._raw[i]; }); + Kokkos::fence(); } void StateVector::multiply_coef(const Complex& coef) { Kokkos::parallel_for( this->_dim, KOKKOS_CLASS_LAMBDA(UINT i) { this->_raw[i] *= coef; }); + Kokkos::fence(); } std::vector StateVector::sampling(UINT sampling_count, UINT seed) const { @@ -201,6 +206,7 @@ std::vector StateVector::sampling(UINT sampling_count, UINT seed) const { result[i] = lo; rand_pool.free_state(rand_gen); }); + Kokkos::fence(); return internal::convert_device_view_to_host_vector(result); }