Skip to content

Commit

Permalink
Merge pull request #124 from qulacs/MDRange
Browse files Browse the repository at this point in the history
paralle_for unrolling
  • Loading branch information
gandalfr-KY authored Jun 26, 2024
2 parents 2fcf46b + 498c514 commit f504ed9
Showing 1 changed file with 49 additions and 77 deletions.
126 changes: 49 additions & 77 deletions scaluq/operator/operator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,48 +111,35 @@ Complex Operator::get_expectation_value(const StateVector& state_vector) const {
UINT dim = state_vector.dim();
Complex res;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<>(nterms, Kokkos::AUTO()),
KOKKOS_LAMBDA(
const Kokkos::TeamPolicy<Kokkos::DefaultExecutionSpace>::TeamPolicy::member_type& team,
Complex& term_res) {
UINT term_id = team.league_rank();
Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0}, {nterms, dim >> 1}),
KOKKOS_LAMBDA(UINT term_id, UINT state_idx, Complex & res_lcl) {
UINT bit_flip_mask = bmasks[term_id];
UINT phase_flip_mask = pmasks[term_id];
Complex coef = coefs[term_id];
Complex tmp = [&] {
if (bit_flip_mask == 0) {
double res;
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(team, dim),
[=](const UINT& state_idx, double& sum) {
double tmp = (Kokkos::conj(state_vector._raw[state_idx]) *
state_vector._raw[state_idx])
.real();
if (Kokkos::popcount(state_idx & phase_flip_mask) & 1) tmp = -tmp;
sum += tmp;
},
res);
return coef * res;
}
if (bit_flip_mask == 0) {
UINT state_idx1 = state_idx << 1;
double tmp1 =
(Kokkos::conj(state_vector._raw[state_idx1]) * state_vector._raw[state_idx1])
.real();
if (Kokkos::popcount(state_idx1 & phase_flip_mask) & 1) tmp1 = -tmp1;
UINT state_idx2 = state_idx1 | 1;
double tmp2 =
(Kokkos::conj(state_vector._raw[state_idx2]) * state_vector._raw[state_idx2])
.real();
if (Kokkos::popcount(state_idx2 & phase_flip_mask) & 1) tmp2 = -tmp2;
res_lcl += coef * (tmp1 + tmp2);
} else {
UINT pivot = sizeof(UINT) * 8 - Kokkos::countl_zero(bit_flip_mask) - 1;
UINT global_phase_90rot_count = Kokkos::popcount(bit_flip_mask & phase_flip_mask);
Complex global_phase = PHASE_90ROT().val[global_phase_90rot_count % 4];
double res;
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(team, dim >> 1),
[=](const UINT& state_idx, double& sum) {
UINT basis_0 = internal::insert_zero_to_basis_index(state_idx, pivot);
UINT basis_1 = basis_0 ^ bit_flip_mask;
double tmp = Kokkos::real(state_vector._raw[basis_0] *
Kokkos::conj(state_vector._raw[basis_1]) *
global_phase * 2.);
if (Kokkos::popcount(basis_0 & phase_flip_mask) & 1) tmp = -tmp;
sum += tmp;
},
res);
return coef * res;
}();
Kokkos::single(Kokkos::PerTeam(team), [&] { term_res += tmp; });
UINT basis_0 = internal::insert_zero_to_basis_index(state_idx, pivot);
UINT basis_1 = basis_0 ^ bit_flip_mask;
double tmp =
Kokkos::real(state_vector._raw[basis_0] *
Kokkos::conj(state_vector._raw[basis_1]) * global_phase * 2.);
if (Kokkos::popcount(basis_0 & phase_flip_mask) & 1) tmp = -tmp;
res_lcl += coef * tmp;
}
},
res);
Kokkos::fence();
Expand Down Expand Up @@ -193,50 +180,35 @@ Complex Operator::get_transition_amplitude(const StateVector& state_vector_bra,
UINT dim = state_vector_bra.dim();
Complex res;
Kokkos::parallel_reduce(
Kokkos::TeamPolicy<>(nterms, Kokkos::AUTO()),
KOKKOS_LAMBDA(
const Kokkos::TeamPolicy<Kokkos::DefaultExecutionSpace>::TeamPolicy::member_type& team,
Complex& term_res) {
UINT term_id = team.league_rank();
Kokkos::MDRangePolicy<Kokkos::Rank<2>>({0, 0}, {nterms, dim >> 1}),
KOKKOS_LAMBDA(UINT term_id, UINT state_idx, Complex & res_lcl) {
UINT bit_flip_mask = bmasks[term_id];
UINT phase_flip_mask = pmasks[term_id];
Complex coef = coefs[term_id];
Complex tmp = [&] {
Complex res;
if (bit_flip_mask == 0) {
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(team, dim),
[=](const UINT& state_idx, Complex& sum) {
Complex tmp = (Kokkos::conj(state_vector_bra._raw[state_idx]) *
state_vector_ket._raw[state_idx]);
if (Kokkos::popcount(state_idx & phase_flip_mask) & 1) tmp = -tmp;
sum += tmp;
},
res);
} else {
UINT pivot = sizeof(UINT) * 8 - Kokkos::countl_zero(bit_flip_mask) - 1;
UINT global_phase_90rot_count =
Kokkos::popcount(bit_flip_mask & phase_flip_mask);
Complex global_phase = PHASE_90ROT().val[global_phase_90rot_count % 4];
Kokkos::parallel_reduce(
Kokkos::TeamThreadRange(team, dim >> 1),
[=](const UINT& state_idx, Complex& sum) {
UINT basis_0 = internal::insert_zero_to_basis_index(state_idx, pivot);
UINT basis_1 = basis_0 ^ bit_flip_mask;
Complex tmp1 = Kokkos::conj(state_vector_bra._raw[basis_1]) *
state_vector_ket._raw[basis_0] * global_phase;
if (Kokkos::popcount(basis_0 & phase_flip_mask) & 1) tmp1 = -tmp1;
Complex tmp2 = Kokkos::conj(state_vector_bra._raw[basis_0]) *
state_vector_ket._raw[basis_1] * global_phase;
if (Kokkos::popcount(basis_1 & phase_flip_mask) & 1) tmp2 = -tmp2;
sum += tmp1 + tmp2;
},
res);
}
team.team_barrier();
return coef * res;
}();
Kokkos::single(Kokkos::PerTeam(team), [&] { term_res += tmp; });
if (bit_flip_mask == 0) {
UINT state_idx1 = state_idx << 1;
Complex tmp1 = (Kokkos::conj(state_vector_bra._raw[state_idx1]) *
state_vector_ket._raw[state_idx1]);
if (Kokkos::popcount(state_idx1 & phase_flip_mask) & 1) tmp1 = -tmp1;
UINT state_idx2 = state_idx1 | 1;
Complex tmp2 = (Kokkos::conj(state_vector_bra._raw[state_idx2]) *
state_vector_ket._raw[state_idx2]);
if (Kokkos::popcount(state_idx2 & phase_flip_mask) & 1) tmp2 = -tmp2;
res_lcl += coef * (tmp1 + tmp2);
} else {
UINT pivot = sizeof(UINT) * 8 - Kokkos::countl_zero(bit_flip_mask) - 1;
UINT global_phase_90rot_count = Kokkos::popcount(bit_flip_mask & phase_flip_mask);
Complex global_phase = PHASE_90ROT().val[global_phase_90rot_count % 4];
UINT basis_0 = internal::insert_zero_to_basis_index(state_idx, pivot);
UINT basis_1 = basis_0 ^ bit_flip_mask;
Complex tmp1 = Kokkos::conj(state_vector_bra._raw[basis_1]) *
state_vector_ket._raw[basis_0] * global_phase;
if (Kokkos::popcount(basis_0 & phase_flip_mask) & 1) tmp1 = -tmp1;
Complex tmp2 = Kokkos::conj(state_vector_bra._raw[basis_0]) *
state_vector_ket._raw[basis_1] * global_phase;
if (Kokkos::popcount(basis_1 & phase_flip_mask) & 1) tmp2 = -tmp2;
res_lcl += coef * (tmp1 + tmp2);
}
},
res);
Kokkos::fence();
Expand Down

0 comments on commit f504ed9

Please sign in to comment.