-
Notifications
You must be signed in to change notification settings - Fork 0
/
03-concurrent.c
182 lines (141 loc) · 5.12 KB
/
03-concurrent.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#include <pthread.h>
#include <semaphore.h>
#include <stdio.h>
//#include <assert.h>
//#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <time.h>
#include <multiverse.h>
#include "mmview.h"
// In this third example, we use multiverse and ATLAS's mmviews to
// mitigate cache line bouncing and while profiling.
/*
We use a function to measure time with Linux' CLOCK_REALTIME. This
function always returns the time delta with respect to the last call
of timedelta();
*/
double timedelta(void) {
static struct timespec last_now;
struct timespec now;
clock_gettime(CLOCK_REALTIME, &now);
double delta = now.tv_sec - last_now.tv_sec;
delta += (now.tv_nsec - last_now.tv_nsec)/1e9;
last_now = now;
return delta;
}
// We use a single multiverse variable to switch between profiling and
// non-profiling mode. In this exmaple, we use explictly inserted
// profiling points. However, regular gprof count that uses mcount has
// the same cache-line bouncing problems.
__attribute__((multiverse)) volatile int DO_PROFILE = 0;
//#define PROFILE(var) do { if (DO_PROFILE) { __atomic_add_fetch(&var, 1, __ATOMIC_RELAXED); } } while(0)
#define PROFILE(var) do { if (DO_PROFILE) { var++; } } while(0)
unsigned long profile_calls;
unsigned long profile_branches;
////////////////////////////////////////////////////////////////
// The workload: A recursive fibonacci implementation, where we want
// to profile calls and branches.
//
// profile_branches/profile_calls is 0.5
__attribute__((multiverse))
unsigned long fib(int n) {
PROFILE(profile_calls);
if (n <= 1) {
PROFILE(profile_branches);
return n;
}
return fib(n-1) + fib(n-2);
}
// For benchmarking, we sum up the total run-time and number of fib()
// calls. In order record all calls, we protect these variables with a lock().
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
double duration_total = 0.0;
unsigned duration_count = 0;
// We will create two views. Within the profile view, profiling is
// enabled with multiverse. With the semaphore, we control the maximal
// number of threads in that mmview.
sem_t profile_view_sem;
mmview_t default_view;
mmview_t profile_view;
#define FIB_CALC 30
#define __STR__(X) #X
#define _STR_(X) __STR__(X)
#define FIB_CALC_STR _STR_(FIB_CALC)
void *worker(void *arg) {
long thread_num = (long)arg;
(void) thread_num;
mmview_t current_view = mmview_current();
for (unsigned i = 0; i< 100; i++) {
// N Threads live in the profiling view.
if (sem_trywait(&profile_view_sem) == 0) {
if (current_view != profile_view)
mmview_migrate(profile_view);
current_view = profile_view;
} else {
if (current_view != default_view)
mmview_migrate(default_view);
current_view = default_view;
}
// Execute the workload
timedelta();
unsigned long result = fib(FIB_CALC);
(void) result;
double duration = timedelta() * 1000.0;
// UNCOMMENT to get per run timings
//printf("thread%ld: %f ms (profile=%d, fib(" FIB_CALC_STR ") = %ld)\n",
// thread_num, duration, (current_view == profile_view), result);
// Sum up run-time and number of fibonacci runs
pthread_mutex_lock(&lock);
duration_total += duration;
duration_count += 1;
pthread_mutex_unlock(&lock);
// Post the parallism semaphore
if (current_view == profile_view) {
sem_post(&profile_view_sem);
}
}
return NULL;
}
int main(int argc, char *argv[]) {
mmview_unshare_text();
multiverse_init();
multiverse_dump_info();
// Perpare the non-profiling view (view 0) with DO_PROFILE=0
default_view = mmview_current();
DO_PROFILE=0;
multiverse_commit();
// Prepare a separate view (view 1) with DO_PROFILE=1
profile_view = mmview_create();
mmview_migrate(profile_view);
DO_PROFILE = 1;
multiverse_commit();
// Switch back to the default view
mmview_migrate(default_view);
////////////////////////////////////////////////////////////////
// Parse the command line
if (argc < 2) {
printf("%s <THREADCOUNT> [PROFILE_COUNT]\n", argv[0]);
return -1;
}
unsigned nr_threads = atoi(argv[1]);
unsigned nr_profiling = nr_threads;
if (argc >= 3) {
nr_profiling = atoi(argv[2]);
}
////////////////////////////////////////////////////////////////
// Start Threads and wait for them
printf("Starting with %d threads. %d threads are profiling\n", nr_threads, nr_profiling);
sem_init(&profile_view_sem, false, nr_profiling);
pthread_t *threads = malloc(sizeof(pthread_t) * nr_threads);
for (unsigned long i = 0; i < nr_threads; i++) {
pthread_create(&threads[i], NULL, worker, (void*)i);
}
for (unsigned long i = 0; i < nr_threads; i++) {
pthread_join(threads[i], NULL);
}
printf("Avg. time for fib(" FIB_CALC_STR ") = %f ms; %f branches/call\n",
duration_total/duration_count,
profile_branches/(double)profile_calls);
return 0;
}