-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparallelAlg1.c
executable file
·650 lines (578 loc) · 24 KB
/
parallelAlg1.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
/*Function that performs the first the canonical reduction of the stabilizer tableau (Algorithm 1 in the paper)
Constraints:
- We have to make sure that N>=M
- num_qubits % N = 0
Inputs:
- A valid stabilizer tableau. That is all of the rows are linearly independent, and contain at least a one
- A vector of phases corresponding to the rows of the tableau.
Outputs:
- A csv file containing the canonical decomposition of the tableau
*/
#include "stdio.h"
#include "stdlib.h"
#include "bsp.h"
#include "math.h"
#include "string.h"
#include "stdbool.h"
#define MOD(a, b) (((a % b) + b) % b)
const int P = 4; // Number of processors
const int N = 2; // Processor rows
const int M = 2; // Processor columns
const int num_qubits = 8; // Sometimes referred to as n
int our_nan = -1 * ((2 * num_qubits) + 1);
int mat[8][2 * 8]; // Size of the tableau
int vec[8]; // Size of the phase vector
// Function that reads the starting tableau and loads it into directly into our 2-d array "mat"
void read_csv(){
char buffer[100000];
char *record, *line;
int i = 0, j = 0;
FILE *fstream = fopen("~/ParallelStabilizerInnerProduct/Inputs/starting_tableau.csv", "r");
if (fstream == NULL){
printf("\n file opening failed ");
}
while ((line = fgets(buffer, 100000, fstream)) != NULL){
record = strtok(line, ",");
while (record != NULL){
mat[i][j++] = atoi(record);
record = strtok(NULL, ",");
}
++i;
}
fclose(fstream);
}
// Function that loads the vector of phases directly into "vec"
void read_vec(){
char buffer[1024];
char *record, *line;
int i = 0;
FILE *fstream = fopen("~/ParallelStabilzerInnerProduct/Inputs/starting_phases.csv", "r");
if (fstream == NULL){
printf("\n file opening failed ");
}
while ((line = fgets(buffer, 1024, fstream)) != NULL){
record = strtok(line, ",");
while (record != NULL){
vec[i++] = atoi(record);
record = strtok(NULL, ",");
}
}
fclose(fstream);
}
// Function that writes the tableau and resulting phases into a CSV file
void write_output(){
FILE *alg1_output_mat;
FILE *alg1_output_vec;
alg1_output_mat = fopen("~/ParallelStabilizerInnerProduct/Outputs/alg1_output_mat.txt", "w");
if (alg1_output_mat != NULL){
for (int i = 0; i < num_qubits; i++){
for (int j = 0; j < 2 * num_qubits; j++){
if (i == num_qubits - 1 && j == 2 * num_qubits - 1){
fprintf(alg1_output_mat, "%d \n", mat[i][j]);
}
else{
fprintf(alg1_output_mat, "%d,", mat[i][j]);
}
}
}
}
else{
printf("The file could not be opened");
}
fclose(alg1_output_mat);
alg1_output_vec = fopen("~/ParallelStabilizerInnerProduct/Outputs/alg1_output_vec.txt", "w");
if (alg1_output_vec != NULL){
for (int l = 0; l < num_qubits; l++){
if (l != num_qubits - 1){
fprintf(alg1_output_vec, "%d,", vec[l]);
}
else{
fprintf(alg1_output_vec, "%d", vec[l]);
}
}
}
fclose(alg1_output_vec);
}
// Parallel function
void parallelalg1(){
// Begin using "P" processors
bsp_begin(P);
// Name the processors using 2d naming
int pid = bsp_pid(); // P0 = 00, P1 = 10 P2 = 01, P3 = 11
int row_name = pid % N;
int col_name = (int)floor(pid / N);
const int column_dim = 2 * num_qubits / M;
const int row_dim = num_qubits / N;
const int tot_values = column_dim * row_dim;
// Initialize "my_matrix"
int *my_matrix[row_dim];
for (int i = 0; i < row_dim; i++){
my_matrix[i] = malloc(column_dim * sizeof(int));
}
// Distribute the data of the global "mat" to the local matrices "my_matrix" according to the 2d cyclic Cartesian distribution
for (int i = 0; i < row_dim; i++){
for (int j = 0; j < column_dim; j++){
my_matrix[i][j] = mat[row_name + i * N][col_name + j * M];
}
}
// Initialize "my_vec" and fill it with entries from "vec"
// Initialize "total_as" and fill with zeros
int *my_vec = NULL;
int *total_as = NULL;
if (col_name == 0){
my_vec = malloc(row_dim * sizeof(int));
total_as = malloc(row_dim * sizeof(int));
for (int i = 0; i < row_dim; i++){
my_vec[i] = vec[row_name + i * N];
total_as[i] = 0;
}
}
// Initialize registers that enable data communication between the processors
int *entries_received = malloc(column_dim * sizeof(int));
bsp_push_reg(entries_received, column_dim * sizeof(int)); // Array for received row entries. Used in swapping
int phase_received = -1;
bsp_push_reg(&phase_received, sizeof(int)); // Array for received phase entries
int *winner_row_array = malloc(P * sizeof(int));
for (int i = 0; i < P; i++){
winner_row_array[i] = -1;
}
bsp_push_reg(winner_row_array, P * sizeof(int)); // Array to determine which processor wins
int winner_row = -1;
bsp_push_reg(&winner_row, sizeof(int)); // Which row wins
bool empty = true;
bsp_push_reg(&empty, sizeof(bool)); // Flag
int *flag_array = malloc(row_dim * sizeof(int));
for (int i = 0; i < row_dim; i++){
flag_array[i] = -1;
}
bsp_push_reg(flag_array, row_dim * sizeof(int)); // Array for received column entries
int *num_to_add = malloc(column_dim * sizeof(int));
for (int i = 0; i < column_dim; i++){
num_to_add[i] = -1;
}
bsp_push_reg(num_to_add, column_dim * sizeof(int)); // Array for received row entries. Used in row reduction
int *partial_as = malloc(row_dim * M * sizeof(int));
for (int i = 0; i < row_dim * M; i++){
partial_as[i] = -1;
}
bsp_push_reg(partial_as, row_dim * M * sizeof(int)); // Helper array for phase calculations
int *partial_bs = malloc(row_dim * M * sizeof(int));
for (int i = 0; i < row_dim * M; i++){
partial_bs[i] = -1;
}
bsp_push_reg(partial_bs, row_dim * M * sizeof(int)); // Helper array for phase calculations
bsp_sync();
// Helper local vars
int *a_loc = malloc(row_dim * sizeof(int));
int *b_loc = malloc(row_dim * sizeof(int));
int k = 0;
int diag_pid = 0;
int row_w_one = 0;
int temp_phase = 0;
// BEGINNING OF THE MOST OUTER LOOP. REDUCTION FOR THE RIGHT SIDE OF THE TABLEAU
// =====================================================================================================================================================
// =====================================================================================================================================================
// If there exists a one in a given column for all columns, then col=k. But if there isn't they will differ
for (int col = num_qubits; col < 2 * num_qubits; col++){
empty = true;
diag_pid = k % N + (col % M) * N; // pid of the processor that handles the diagonal element (right side of the tableau)
// All processors search for a 1 in column col. If they do have a 1, they communicate the number of the row to "diag_pid"
if (col_name == col % M){
for (int i = 0; i < row_dim; i++){
if (my_matrix[i][(int)floor(col / M)] == 1 && (i * N + row_name) >= k){
empty = false;
row_w_one = i * N + row_name; // inverse mapping referring to the global mat
bsp_put(diag_pid, &row_w_one, winner_row_array, pid * sizeof(int), sizeof(int));
for (int ii = 0; ii < P; ii++){ // Update the value of "empty" for all processors
bsp_put(ii, &empty, &empty, 0, sizeof(bool));
}
break;
}
}
}
bsp_sync();
if (empty == false){
// Processor handling the diagonal element decides on the processor which it will swap rows with (winner), and announces it to all other processors
if (pid == diag_pid){
if (winner_row_array[diag_pid] != -1){ // If "diag_pid" has the row with one, it should be the winner. Reduces communication costs
for (int i = 0; i < P; i++){
bsp_put(i, &winner_row_array[pid], &winner_row, 0, sizeof(int)); // Announce the winner row to all other processors
}
}
else{
for (int i = 0; i < P; i++){
if (winner_row_array[i] != -1){
for (int ii = 0; ii < P; ii++){
bsp_put(ii, &winner_row_array[i], &winner_row, 0, sizeof(int)); // Announce the winner row to all other processors
}
break;
}
}
}
}
bsp_sync();
// Swaps begin here!
// ============================================================================================================================================
// Processors that control the row where the diagonal element currently is. They send the row to the processors responsible of the winner row
if (row_name == k % N && row_name != winner_row % N){
bsp_put(winner_row % N + (col_name % M) * N, my_matrix[(int)floor(k / N)], entries_received, 0, column_dim * sizeof(int)); // Send row elements
if (col_name == 0){
bsp_put(winner_row % N + (col_name % M) * N, &my_vec[(int)floor(k / N)], &phase_received, 0, sizeof(int)); // Send phase elements
}
}
// Processors that handle the winner row send to those handling row k
if (row_name == winner_row % N && row_name != k % N){
bsp_put(k % N + (col_name % M) * N, my_matrix[(int)floor(winner_row / N)], entries_received, 0, column_dim * sizeof(int)); // Send row elements
if (col_name == 0){
bsp_put(k % N + (col_name % M) * N, &my_vec[(int)floor(winner_row / N)], &phase_received, 0, sizeof(int)); // Send phase elements
}
}
bsp_sync();
// Processors overwrite their values of "my_matrix" with the elements just received
// ------------------------------------------------------------------
// This is the case when the processors handling row k also handle the winner row
if (row_name == k % N && row_name == winner_row % N){
// Swap rows
int *temp_array = NULL;
temp_array = my_matrix[(int)floor(k / N)];
my_matrix[(int)floor(k / N)] = my_matrix[(int)floor(winner_row / N)];
my_matrix[(int)floor(winner_row / N)] = temp_array;
temp_array = NULL;
// Swap phases
if (col_name == 0){
temp_phase = my_vec[(int)floor(k / N)];
my_vec[(int)floor(k / N)] = my_vec[(int)floor(winner_row / N)];
my_vec[(int)floor(winner_row / N)] = temp_phase;
temp_phase = -1;
}
}
// Case when the processors handling row k do not handle the winner row
else if (row_name == k % N){ // Processors that handle row k update its local matrix
for (int i = 0; i < column_dim; i++){
my_matrix[(int)floor(k / N)][i] = entries_received[i];
}
if (col_name == 0){
my_vec[(int)floor(k / N)] = phase_received;
}
}
else if (row_name == winner_row % N){ // Processors that handle winner row update its local matrix
for (int i = 0; i < column_dim; i++){
my_matrix[(int)floor(winner_row / N)][i] = entries_received[i];
}
if (col_name == 0){
my_vec[(int)floor(winner_row / N)] = phase_received;
}
}
bsp_sync();
// Row reduction steps! Get rid of ones in the same column as "col"
// =========================================================================================================================================
// Processors that control row "k" send their row elements to the processor column
if (row_name == k % N){
for (int l = 0; l < N; l++){
bsp_put(l + (col_name % M) * N, my_matrix[(int)floor(k / N)], num_to_add, 0, column_dim * sizeof(int));
if (col_name == 0){
// Processor in row k with column name 0 send their phases to all other procs with col_name 0. We reuse the "phase_received" memory spaces
bsp_put(l, &my_vec[(int)floor(k / N)], &phase_received, 0, sizeof(int));
}
}
}
// Processors that control column "col" send their column entries to their processor row. We need two cases!
// Case 1: processor handling column "col" and row "k" has to be careful, we do not want to delete its 1
if (col_name == col % M && row_name == k % N){
my_matrix[(int)floor(k / N)][(int)floor(col / M)] = -1;
for (int i = 0; i < row_dim; i++){
for (int l = 0; l < M; l++){
bsp_put(row_name + (l % M) * N, &my_matrix[i][(int)floor(col / M)], flag_array, i * sizeof(int), sizeof(int));
}
}
my_matrix[(int)floor(k / N)][(int)floor(col / M)] = 1;
}
// Case 2: everything proceeds normally
else if (col_name == col % M){
for (int i = 0; i < row_dim; i++){
for (int l = 0; l < M; l++){
bsp_put(row_name + (l % M) * N, &my_matrix[i][(int)floor(col / M)], flag_array, i * sizeof(int), sizeof(int));
}
}
}
bsp_sync();
// Compute the first part of the symplectic inner product in parallel. All local!
// Let z,x = my_matrix[:column_dim/2], my_matrix[column_dim/2:] and z',x' = nums_to_add[:column_dim/2], nums_to_add[column_dim/2:]
// Compute a_loc = <z,x'> - <x,z'> and b_loc = <z+z', x+x'> % 2 - <z+z', x+x'>
for (int l = 0; l < row_dim; l++){
a_loc[l] = our_nan;
b_loc[l] = our_nan;
}
for (int i = 0; i < row_dim; i++){
if (flag_array[i] == 1){
a_loc[i] = 0;
b_loc[i] = 0;
for (int j = 0; j < (int)(column_dim / 2); j++){
a_loc[i] += (my_matrix[i][j] * num_to_add[j + (column_dim / 2)]) - (my_matrix[i][j + (column_dim / 2)] * num_to_add[j]);
int v = MOD((my_matrix[i][j] + num_to_add[j]) * (my_matrix[i][j + column_dim / 2] + num_to_add[j + column_dim / 2]), 2);
b_loc[i] += v - ((my_matrix[i][j] + num_to_add[j]) * (my_matrix[i][j + column_dim / 2] + num_to_add[j + column_dim / 2]));
}
}
}
// We put the local "a_loc" and "b_loc", on the processor with column_name = 0 that also handles that rows
bsp_put(row_name, a_loc, partial_as, col_name * row_dim * sizeof(int), row_dim * sizeof(int));
bsp_put(row_name, b_loc, partial_bs, col_name * row_dim * sizeof(int), row_dim * sizeof(int));
bsp_sync();
// The processor with column name 0, will use the partial a's to compute a for every row it particates in
if (col_name == 0){
for (int i = 0; i < row_dim; i++){
if (partial_as[i] != our_nan){
for (int ii = i; ii < row_dim * M; ii += row_dim){
total_as[i] += partial_as[ii] + partial_bs[ii];
}
total_as[i] = MOD(total_as[i], 4);
my_vec[i] = MOD(total_as[i] / 2 + my_vec[i] + phase_received, 2);
}
}
}
// Each processor replaces its rows and phases with the things just computed
for (int i = 0; i < num_qubits; i++){
if (row_name == i % N){
if (flag_array[(int)floor(i / N)] == 1){
for (int l = 0; l < column_dim; l++){
my_matrix[(int)floor(i / N)][l] = (my_matrix[(int)floor(i / N)][l] + num_to_add[l]) % 2;
}
}
}
}
// Reset vars before next iteration
for (int i = 0; i < P; i++){
winner_row_array[i] = -1;
}
for (int i = 0; i < row_dim; i++){
flag_array[i] = -1;
if (col_name == 0){
total_as[i] = 0;
}
}
for (int i = 0; i < column_dim; i++){
num_to_add[i] = -1;
entries_received[i] = -1;
}
for (int i = 0; i < row_dim * M; i++){
partial_as[i] = our_nan;
partial_bs[i] = our_nan;
}
phase_received = -1;
k++;
bsp_sync();
}
}
// BEGINNING OF THE MOST OUTER LOOP. REDUCTION FOR THE LEFT SIDE OF THE TABLEAU
// =====================================================================================================================================================
// =====================================================================================================================================================
// This part follows the exact structure as in the right case, the only difference is that "k" starts from where it left off in the previous section.
for (int col = 0; col < num_qubits; col++){
empty = true;
diag_pid = k % N + (col % M) * N;
if (col_name == col % M){
for (int i = 0; i < row_dim; i++){
if (my_matrix[i][(int)floor(col / M)] == 1 && (i * N + row_name) >= k){
empty = false;
row_w_one = i * N + row_name;
bsp_put(diag_pid, &row_w_one, winner_row_array, pid * sizeof(int), sizeof(int));
for (int ii = 0; ii < P; ii++){
bsp_put(ii, &empty, &empty, 0, sizeof(bool));
}
break;
}
}
}
bsp_sync();
if (empty == false){
if (pid == diag_pid){
if (winner_row_array[pid] != -1){
for (int i = 0; i < P; i++){
bsp_put(i, &winner_row_array[pid], &winner_row, 0, sizeof(int));
}
}
else{
for (int i = 0; i < P; i++){
if (winner_row_array[i] != -1){
for (int ii = 0; ii < P; ii++){
bsp_put(ii, &winner_row_array[i], &winner_row, 0, sizeof(int));
}
break;
}
}
}
}
bsp_sync();
// Swaps begin here!
// ============================================================================================================================================
// Processors that control the row where the diagonal element we are checking atm is. We send the row to the processors responsible of the winner row.
if (row_name == k % N && row_name != winner_row % N){
bsp_put(winner_row % N + (col_name % M) * N, my_matrix[(int)floor(k / N)], entries_received, 0, column_dim * sizeof(int));
if (col_name == 0){
bsp_put(winner_row % N + (col_name % M) * N, &my_vec[(int)floor(k / N)], &phase_received, 0, sizeof(int));
}
}
//Processors that handle the winner row send to those handling row k.
if (row_name == winner_row % N && row_name != k % N){
bsp_put(k % N + (col_name % M) * N, my_matrix[(int)floor(winner_row / N)], entries_received, 0, column_dim * sizeof(int));
if (col_name == 0){
bsp_put(k % N + (col_name % M) * N, &my_vec[(int)floor(winner_row / N)], &phase_received, 0, sizeof(int));
}
}
bsp_sync();
//We overwrite our values in "my_matrix" with those received.
// This is the case where the processor handling row k also handles the winner row.
if (row_name == k % N && row_name == winner_row % N){
int *temp_array = NULL;
temp_array = my_matrix[(int)floor(k / N)];
my_matrix[(int)floor(k / N)] = my_matrix[(int)floor(winner_row / N)];
my_matrix[(int)floor(winner_row / N)] = temp_array;
temp_array = NULL;
if (col_name == 0){
temp_phase = my_vec[(int)floor(k / N)];
my_vec[(int)floor(k / N)] = my_vec[(int)floor(winner_row / N)];
my_vec[(int)floor(winner_row / N)] = temp_phase;
temp_phase = -1;
}
}
else if (row_name == k % N){ //Processor that handles row k updates its matrix.
for (int i = 0; i < column_dim; i++){
my_matrix[(int)floor(k / N)][i] = entries_received[i];
}
if (col_name == 0){
my_vec[(int)floor(k / N)] = phase_received;
}
}
else if (row_name == winner_row % N){ //Processor that handles winner row updates its matrix.
for (int i = 0; i < column_dim; i++){
my_matrix[(int)floor(winner_row / N)][i] = entries_received[i];
}
if (col_name == 0){
my_vec[(int)floor(winner_row / N)] = phase_received;
}
}
// Row reduction steps! Get rid of ones in the same column as "col"
// =========================================================================================================================================
if (row_name == k % N){
for (int l = 0; l < N; l++){
bsp_put(l + (col_name % M) * N, my_matrix[(int)floor(k / N)], num_to_add, 0, (int)(column_dim / 2) * sizeof(int));
if (col_name == 0){
bsp_put(l, &my_vec[(int)floor(k / N)], &phase_received, 0, sizeof(int));
}
}
}
if (col_name == col % M && row_name == k % N){
my_matrix[(int)floor(k / N)][(int)floor(col / M)] = -1;
for (int i = 0; i < row_dim; i++){
for (int l = 0; l < M; l++){
bsp_put(row_name + (l % M) * N, &my_matrix[i][(int)floor(col / M)], flag_array, i * sizeof(int), sizeof(int));
}
}
my_matrix[(int)floor(k / N)][(int)floor(col / M)] = 1;
}
else if (col_name == col % M){
for (int i = 0; i < row_dim; i++){
for (int l = 0; l < M; l++){
bsp_put(row_name + (l % M) * N, &my_matrix[i][(int)floor(col / M)], flag_array, i * sizeof(int), sizeof(int));
}
}
}
bsp_sync();
for (int l = 0; l < row_dim; l++){
a_loc[l] = our_nan;
b_loc[l] = our_nan;
}
for (int i = 0; i < row_dim; i++){
if (flag_array[i] == 1){
a_loc[i] = 0;
b_loc[i] = 0;
for (int j = 0; j < (int)column_dim / 2; j++){
a_loc[i] += -1 * (my_matrix[i][j + (column_dim / 2)] * num_to_add[j]);
int v = MOD((my_matrix[i][j] + num_to_add[j]) * my_matrix[i][j + column_dim / 2], 2);
b_loc[i] += v - ((my_matrix[i][j] + num_to_add[j]) * my_matrix[i][j + column_dim / 2]);
}
}
}
bsp_put(row_name, a_loc, partial_as, col_name * row_dim * sizeof(int), row_dim * sizeof(int));
bsp_put(row_name, b_loc, partial_bs, col_name * row_dim * sizeof(int), row_dim * sizeof(int));
bsp_sync();
if (col_name == 0){
for (int i = 0; i < row_dim; i++){
if (partial_as[i] != our_nan){
for (int ii = i; ii < row_dim * M; ii += row_dim){
total_as[i] += partial_as[ii] + partial_bs[ii];
}
total_as[i] = MOD(total_as[i], 4);
my_vec[i] = MOD(total_as[i] / 2 + my_vec[i] + phase_received, 2);
}
}
}
for (int i = 0; i < num_qubits; i++){
if (row_name == i % N){
if (flag_array[(int)floor(i / N)] == 1){
for (int l = 0; l < column_dim / 2; l++){
my_matrix[(int)floor(i / N)][l] = (my_matrix[(int)floor(i / N)][l] + num_to_add[l]) % 2;
}
}
}
}
for (int i = 0; i < P; i++){
winner_row_array[i] = -1;
}
for (int i = 0; i < row_dim; i++){
flag_array[i] = -1;
if (col_name == 0){
total_as[i] = 0;
}
}
for (int i = 0; i < column_dim; i++){
num_to_add[i] = -1;
entries_received[i] = -1;
}
for (int i = 0; i < row_dim * M; i++){
partial_as[i] = our_nan;
partial_bs[i] = our_nan;
}
phase_received = -1;
k++;
bsp_sync();
}
}
// Delete the registers used for communication between processors
bsp_pop_reg(partial_as);
bsp_pop_reg(partial_bs);
bsp_pop_reg(&phase_received);
bsp_pop_reg(entries_received);
bsp_pop_reg(&empty);
bsp_pop_reg(winner_row_array);
bsp_pop_reg(&winner_row);
bsp_pop_reg(flag_array);
bsp_pop_reg(num_to_add);
// From their local matrix, all processors together reconstruct the global matrix and the global vector of phases
for (int j = 0; j < column_dim; j++){
mat[i * N + row_name][j * M + col_name] = my_matrix[i][j];
}
if (col_name == 0){
vec[i * N + row_name] = my_vec[i];
}
bsp_end();
// Print the final matrix
for (int i = 0; i < num_qubits; i++){
for (int j = 0; j < 2 * num_qubits; j++){
printf("%d,", mat[i][j]);
}
printf("\n");
}
// Write the final matrix into a file
write_output();
}
int main(int argc, char **argv){
// Declare that "parallelalg1" is our parallel function
bsp_init(parallelalg1, argc, argv);
// Read the starting stabilizer tableau and the vector of phases
read_csv();
read_vec();
// Call the parallel function
parallelalg1();
exit(EXIT_SUCCESS);
}