From eb11ab29720ca720a63edfd55b90338325ae65c7 Mon Sep 17 00:00:00 2001 From: Atsushi Togo Date: Sat, 1 Feb 2025 15:33:25 +0900 Subject: [PATCH 1/3] No openmp for collision matrix --- c/collision_matrix.c | 12 ++++++------ phono3py/conductivity/direct_solution.py | 4 +++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/c/collision_matrix.c b/c/collision_matrix.c index 782f9455..f3ea2466 100644 --- a/c/collision_matrix.c +++ b/c/collision_matrix.c @@ -124,9 +124,9 @@ static void get_collision_matrix( gp2tp_map = create_gp2tp_map(triplets_map, num_gp); -#ifdef _OPENMP -#pragma omp parallel for private(j, k, l, m, n, ti, r_gp, collision, inv_sinh) -#endif + // #ifdef _OPENMP + // #pragma omp parallel for private(j, k, l, m, n, ti, r_gp, collision, + // inv_sinh) #endif for (i = 0; i < num_ir_gp; i++) { inv_sinh = (double *)malloc(sizeof(double) * num_band); for (j = 0; j < num_rot; j++) { @@ -194,9 +194,9 @@ static void get_reducible_collision_matrix( gp2tp_map = create_gp2tp_map(triplets_map, num_gp); -#ifdef _OPENMP -#pragma omp parallel for private(j, k, l, ti, collision, inv_sinh) -#endif + // #ifdef _OPENMP + // #pragma omp parallel for private(j, k, l, ti, collision, inv_sinh) + // #endif for (i = 0; i < num_gp; i++) { inv_sinh = (double *)malloc(sizeof(double) * num_band); ti = gp2tp_map[triplets_map[i]]; diff --git a/phono3py/conductivity/direct_solution.py b/phono3py/conductivity/direct_solution.py index 629fe857..7339b407 100644 --- a/phono3py/conductivity/direct_solution.py +++ b/phono3py/conductivity/direct_solution.py @@ -133,7 +133,9 @@ def __init__( if self._is_reducible_collision_matrix: self._collision = CollisionMatrix( - self._pp, is_reducible_collision_matrix=True, log_level=self._log_level + self._pp, + is_reducible_collision_matrix=True, + log_level=self._log_level, ) else: self._rot_grid_points = self._get_rot_grid_points() From e3421cf4661f06627be66a27ee5d3560c5e4d160 Mon Sep 17 00:00:00 2001 From: Atsushi Togo Date: Sat, 1 Feb 2025 15:37:45 +0900 Subject: [PATCH 2/3] With private variable in openmp for collision matrix --- c/collision_matrix.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/c/collision_matrix.c b/c/collision_matrix.c index f3ea2466..ce8db94b 100644 --- a/c/collision_matrix.c +++ b/c/collision_matrix.c @@ -124,9 +124,10 @@ static void get_collision_matrix( gp2tp_map = create_gp2tp_map(triplets_map, num_gp); - // #ifdef _OPENMP - // #pragma omp parallel for private(j, k, l, m, n, ti, r_gp, collision, - // inv_sinh) #endif +#ifdef _OPENMP +#pragma omp parallel for private(j, k, l, m, n, ti, r_gp, collision, inv_sinh, \ + swapped) +#endif for (i = 0; i < num_ir_gp; i++) { inv_sinh = (double *)malloc(sizeof(double) * num_band); for (j = 0; j < num_rot; j++) { @@ -194,9 +195,9 @@ static void get_reducible_collision_matrix( gp2tp_map = create_gp2tp_map(triplets_map, num_gp); - // #ifdef _OPENMP - // #pragma omp parallel for private(j, k, l, ti, collision, inv_sinh) - // #endif +#ifdef _OPENMP +#pragma omp parallel for private(j, k, l, ti, collision, inv_sinh, swapped) +#endif for (i = 0; i < num_gp; i++) { inv_sinh = (double *)malloc(sizeof(double) * num_band); ti = gp2tp_map[triplets_map[i]]; From c6f5cab9cbdc16f3707a036f8bae3b57fcc24121 Mon Sep 17 00:00:00 2001 From: Atsushi Togo Date: Sat, 1 Feb 2025 15:51:50 +0900 Subject: [PATCH 3/3] Separate collision matrix function for each gp --- c/collision_matrix.c | 205 ++++++++++++++++++++++++++----------------- 1 file changed, 125 insertions(+), 80 deletions(-) diff --git a/c/collision_matrix.c b/c/collision_matrix.c index ce8db94b..fd2a9e7a 100644 --- a/c/collision_matrix.c +++ b/c/collision_matrix.c @@ -51,6 +51,15 @@ static void get_collision_matrix( const double *rotations_cartesian, const double *g, const double temperature, const double unit_conversion_factor, const double cutoff_frequency); +static void get_collision_matrix_at_gp( + double *collision_matrix, const double *fc3_normal_squared, + const int64_t num_band0, const int64_t num_band, const double *frequencies, + const int64_t (*triplets)[3], const int64_t *triplets_map, + const int64_t *map_q, const int64_t *rot_grid_points, + const int64_t num_ir_gp, const int64_t num_rot, + const double *rotations_cartesian, const double *g, + const double temperature, const double unit_conversion_factor, + const double cutoff_frequency, const int64_t *gp2tp_map, const int64_t i); static void get_reducible_collision_matrix( double *collision_matrix, const double *fc3_normal_squared, const int64_t num_band0, const int64_t num_band, const double *frequencies, @@ -58,6 +67,13 @@ static void get_reducible_collision_matrix( const int64_t num_gp, const int64_t *map_q, const double *g, const double temperature, const double unit_conversion_factor, const double cutoff_frequency); +static void get_reducible_collision_matrix_at_gp( + double *collision_matrix, const double *fc3_normal_squared, + const int64_t num_band0, const int64_t num_band, const double *frequencies, + const int64_t (*triplets)[3], const int64_t *triplets_map, + const int64_t num_gp, const int64_t *map_q, const double *g, + const double temperature, const double unit_conversion_factor, + const double cutoff_frequency, const int64_t *gp2tp_map, const int64_t i); static int64_t get_inv_sinh(double *inv_sinh, const int64_t gp, const double temperature, const double *frequencies, const int64_t triplet[3], @@ -117,131 +133,160 @@ static void get_collision_matrix( const double *rotations_cartesian, const double *g, const double temperature, const double unit_conversion_factor, const double cutoff_frequency) { - int64_t i, j, k, l, m, n, ti, r_gp, swapped; + int64_t i; int64_t *gp2tp_map; - double collision; - double *inv_sinh; gp2tp_map = create_gp2tp_map(triplets_map, num_gp); #ifdef _OPENMP -#pragma omp parallel for private(j, k, l, m, n, ti, r_gp, collision, inv_sinh, \ - swapped) +#pragma omp parallel for #endif for (i = 0; i < num_ir_gp; i++) { - inv_sinh = (double *)malloc(sizeof(double) * num_band); - for (j = 0; j < num_rot; j++) { - r_gp = rot_grid_points[i * num_rot + j]; - ti = gp2tp_map[triplets_map[r_gp]]; - swapped = get_inv_sinh(inv_sinh, r_gp, temperature, frequencies, - triplets[ti], triplets_map, map_q, num_band, - cutoff_frequency); - - for (k = 0; k < num_band0; k++) { - for (l = 0; l < num_band; l++) { - collision = 0; - for (m = 0; m < num_band; m++) { - if (swapped) { - collision += - fc3_normal_squared[ti * num_band0 * num_band * - num_band + - k * num_band * num_band + - m * num_band + l] * - g[ti * num_band0 * num_band * num_band + - k * num_band * num_band + m * num_band + l] * - inv_sinh[m] * unit_conversion_factor; - } else { - collision += - fc3_normal_squared[ti * num_band0 * num_band * - num_band + - k * num_band * num_band + - l * num_band + m] * - g[ti * num_band0 * num_band * num_band + - k * num_band * num_band + l * num_band + m] * - inv_sinh[m] * unit_conversion_factor; - } - } - for (m = 0; m < 3; m++) { - for (n = 0; n < 3; n++) { - collision_matrix[k * 3 * num_ir_gp * num_band * 3 + - m * num_ir_gp * num_band * 3 + - i * num_band * 3 + l * 3 + n] += - collision * - rotations_cartesian[j * 9 + m * 3 + n]; - } - } - } - } - } - free(inv_sinh); - inv_sinh = NULL; + get_collision_matrix_at_gp( + collision_matrix, fc3_normal_squared, num_band0, num_band, + frequencies, triplets, triplets_map, map_q, rot_grid_points, + num_ir_gp, num_rot, rotations_cartesian, g, temperature, + unit_conversion_factor, cutoff_frequency, gp2tp_map, i); } free(gp2tp_map); gp2tp_map = NULL; } -static void get_reducible_collision_matrix( +static void get_collision_matrix_at_gp( double *collision_matrix, const double *fc3_normal_squared, const int64_t num_band0, const int64_t num_band, const double *frequencies, const int64_t (*triplets)[3], const int64_t *triplets_map, - const int64_t num_gp, const int64_t *map_q, const double *g, + const int64_t *map_q, const int64_t *rot_grid_points, + const int64_t num_ir_gp, const int64_t num_rot, + const double *rotations_cartesian, const double *g, const double temperature, const double unit_conversion_factor, - const double cutoff_frequency) { - int64_t i, j, k, l, ti, swapped; - int64_t *gp2tp_map; + const double cutoff_frequency, const int64_t *gp2tp_map, const int64_t i) { + int64_t j, k, l, m, n, ti, r_gp, swapped; + double collision; double *inv_sinh; - gp2tp_map = create_gp2tp_map(triplets_map, num_gp); - -#ifdef _OPENMP -#pragma omp parallel for private(j, k, l, ti, collision, inv_sinh, swapped) -#endif - for (i = 0; i < num_gp; i++) { - inv_sinh = (double *)malloc(sizeof(double) * num_band); - ti = gp2tp_map[triplets_map[i]]; + inv_sinh = (double *)malloc(sizeof(double) * num_band); + for (j = 0; j < num_rot; j++) { + r_gp = rot_grid_points[i * num_rot + j]; + ti = gp2tp_map[triplets_map[r_gp]]; swapped = - get_inv_sinh(inv_sinh, i, temperature, frequencies, triplets[ti], + get_inv_sinh(inv_sinh, r_gp, temperature, frequencies, triplets[ti], triplets_map, map_q, num_band, cutoff_frequency); - for (j = 0; j < num_band0; j++) { - for (k = 0; k < num_band; k++) { + for (k = 0; k < num_band0; k++) { + for (l = 0; l < num_band; l++) { collision = 0; - for (l = 0; l < num_band; l++) { + for (m = 0; m < num_band; m++) { if (swapped) { collision += fc3_normal_squared[ti * num_band0 * num_band * num_band + - j * num_band * num_band + - l * num_band + k] * + k * num_band * num_band + + m * num_band + l] * g[ti * num_band0 * num_band * num_band + - j * num_band * num_band + l * num_band + k] * - inv_sinh[l] * unit_conversion_factor; + k * num_band * num_band + m * num_band + l] * + inv_sinh[m] * unit_conversion_factor; } else { collision += fc3_normal_squared[ti * num_band0 * num_band * num_band + - j * num_band * num_band + - k * num_band + l] * + k * num_band * num_band + + l * num_band + m] * g[ti * num_band0 * num_band * num_band + - j * num_band * num_band + k * num_band + l] * - inv_sinh[l] * unit_conversion_factor; + k * num_band * num_band + l * num_band + m] * + inv_sinh[m] * unit_conversion_factor; + } + } + for (m = 0; m < 3; m++) { + for (n = 0; n < 3; n++) { + collision_matrix[k * 3 * num_ir_gp * num_band * 3 + + m * num_ir_gp * num_band * 3 + + i * num_band * 3 + l * 3 + n] += + collision * rotations_cartesian[j * 9 + m * 3 + n]; } } - collision_matrix[j * num_gp * num_band + i * num_band + k] += - collision; } } + } + free(inv_sinh); + inv_sinh = NULL; +} + +static void get_reducible_collision_matrix( + double *collision_matrix, const double *fc3_normal_squared, + const int64_t num_band0, const int64_t num_band, const double *frequencies, + const int64_t (*triplets)[3], const int64_t *triplets_map, + const int64_t num_gp, const int64_t *map_q, const double *g, + const double temperature, const double unit_conversion_factor, + const double cutoff_frequency) { + int64_t i; + int64_t *gp2tp_map; + + gp2tp_map = create_gp2tp_map(triplets_map, num_gp); - free(inv_sinh); - inv_sinh = NULL; +#ifdef _OPENMP +#pragma omp parallel for +#endif + for (i = 0; i < num_gp; i++) { + get_reducible_collision_matrix_at_gp( + collision_matrix, fc3_normal_squared, num_band0, num_band, + frequencies, triplets, triplets_map, num_gp, map_q, g, temperature, + unit_conversion_factor, cutoff_frequency, gp2tp_map, i); } free(gp2tp_map); gp2tp_map = NULL; } +static void get_reducible_collision_matrix_at_gp( + double *collision_matrix, const double *fc3_normal_squared, + const int64_t num_band0, const int64_t num_band, const double *frequencies, + const int64_t (*triplets)[3], const int64_t *triplets_map, + const int64_t num_gp, const int64_t *map_q, const double *g, + const double temperature, const double unit_conversion_factor, + const double cutoff_frequency, const int64_t *gp2tp_map, const int64_t i) { + int64_t j, k, l, ti, swapped; + double collision; + double *inv_sinh; + + inv_sinh = (double *)malloc(sizeof(double) * num_band); + ti = gp2tp_map[triplets_map[i]]; + swapped = get_inv_sinh(inv_sinh, i, temperature, frequencies, triplets[ti], + triplets_map, map_q, num_band, cutoff_frequency); + + for (j = 0; j < num_band0; j++) { + for (k = 0; k < num_band; k++) { + collision = 0; + for (l = 0; l < num_band; l++) { + if (swapped) { + collision += fc3_normal_squared[ti * num_band0 * num_band * + num_band + + j * num_band * num_band + + l * num_band + k] * + g[ti * num_band0 * num_band * num_band + + j * num_band * num_band + l * num_band + k] * + inv_sinh[l] * unit_conversion_factor; + } else { + collision += fc3_normal_squared[ti * num_band0 * num_band * + num_band + + j * num_band * num_band + + k * num_band + l] * + g[ti * num_band0 * num_band * num_band + + j * num_band * num_band + k * num_band + l] * + inv_sinh[l] * unit_conversion_factor; + } + } + collision_matrix[j * num_gp * num_band + i * num_band + k] += + collision; + } + } + + free(inv_sinh); + inv_sinh = NULL; +} + static int64_t get_inv_sinh(double *inv_sinh, const int64_t gp, const double temperature, const double *frequencies, const int64_t triplet[3],