-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathKmeans_openMp.cpp
225 lines (157 loc) · 5.45 KB
/
Kmeans_openMp.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/*
============================================================================
Name : Kmeans_openMp.cpp
Author : Federico Nocentini & Corso Vignoli
Version :
Copyright :
Description : OpenMP implementation of K-means clustering algorithm
============================================================================
*/
#include <iostream>
#include <cmath>
#include <fstream>
#include <chrono>
#include "Point.h"
#include "Cluster.h"
#include <omp.h>
#include <string>
#include <vector>
#include <c++/8/sstream>
using namespace std;
using namespace std::chrono;
int num_point = 10000;
int num_cluster = 5;
int max_iterations = 20;
vector<Point> init_point();
vector<Cluster> init_cluster(vector<Point> points);
void compute_distance(vector<Point> &points, vector<Cluster> &clusters);
double euclidean_dist(Point point, Cluster cluster);
bool update_clusters(vector<Cluster> &clusters);
void draw_chart_gnu(vector<Point> &points);
int main() {
int num_thread = 6;
omp_set_num_threads(num_thread);
printf("Number of points %d\n", num_point);
printf("Number of clusters %d\n", num_cluster);
printf("Number of processors: %d\n", omp_get_num_procs());
printf("Number of threads: %d\n", num_thread);
srand(int(time(NULL)));
vector<Point> points;
points = init_point();
vector<Cluster> clusters;
clusters = init_cluster(points);
double time_point1 = omp_get_wtime();
bool conv = true;
int iterations = 0;
printf("Starting iterate...\n");
//The algorithm stops when iterations > max_iteration or when the clusters didn't move
while(conv && iterations < max_iterations){
iterations ++;
compute_distance(points, clusters);
conv = update_clusters(clusters);
printf("Iteration %d done \n", iterations);
}
double time_point2 = omp_get_wtime();
double duration = time_point2 - time_point1;
printf("Number of iterations: %d, total time: %f seconds, time per iteration: %f seconds\n",
iterations, duration, duration/iterations);
try{
printf("Drawing the chart...\n");
draw_chart_gnu(points);
}catch(int e){
printf("Chart not available, gnuplot not found");
}
return 0;
}
//Initialize num_point Points
vector<Point> init_point(){
string fname = "/home/federico/CLionProjects/kmeans_cuda/datasets/2D_data_3.csv";
vector<vector<string>> content;
vector<string> row;
string line, word;
fstream file (fname, ios::in);
if(file.is_open())
{
while(getline(file, line))
{
row.clear();
stringstream str(line);
while(getline(str, word, ','))
row.push_back(word);
content.push_back(row);
}
}
vector<Point> points(num_point);
Point *ptr = &points[0];
cout<<"Datapoints:"<<"\n";
for(int i=0;i<content.size();i++)
{
cout<<content[i][0]<<","<<content[i][0]<<"\n";
Point* point = new Point(std::stod(content[i][0]), std::stod(content[i][1]));
ptr[i] = *point;
}
return points;
}
//Initialize num_cluster Clusters
vector<Cluster> init_cluster(vector<Point> points){
vector<Cluster> clusters(num_cluster);
Cluster* ptr = &clusters[0];
cout<<"Clusters:"<<"\n";
for(int i = 0; i < num_cluster; i++){
int n = rand() % (int) num_point;
Cluster *cluster = new Cluster(points[n].get_x_coord(), points[n].get_y_coord());
cout<<points[n].get_x_coord()<<", "<<points[n].get_y_coord()<<"\n";
ptr[i] = *cluster;
}
return clusters;
}
void compute_distance(vector<Point> &points, vector<Cluster> &clusters){
unsigned long points_size = points.size();
unsigned long clusters_size = clusters.size();
double min_distance;
int min_index;
#pragma omp parallel default(shared) private(min_distance, min_index) firstprivate(points_size, clusters_size)
{
#pragma omp for schedule(static)
for (int i = 0; i < points_size; i++) {
Point &point = points[i];
min_distance = euclidean_dist(point, clusters[0]);
min_index = 0;
for (int j = 1; j < clusters_size; j++) {
Cluster &cluster = clusters[j];
double distance = euclidean_dist(point, cluster);
if (distance < min_distance) {
min_distance = distance;
min_index = j;
}
}
point.set_cluster_id(min_index);
#pragma omp critical
clusters[min_index].add_point(point);
}
}
}
double euclidean_dist(Point point, Cluster cluster){
double distance = sqrt(pow(point.get_x_coord() - cluster.get_x_coord(),2) +
pow(point.get_y_coord() - cluster.get_y_coord(),2));
return distance;
}
bool update_clusters(vector<Cluster> &clusters){
bool conv = false;
for(int i = 0; i < clusters.size(); i++){
conv = clusters[i].update_coords();
clusters[i].free_point();
}
return conv;
}
//Draw point plot with gnuplot
void draw_chart_gnu(vector<Point> &points){
ofstream outfile("data.txt");
for(int i = 0; i < points.size(); i++){
Point point = points[i];
outfile << point.get_x_coord() << " " << point.get_y_coord() << " " << point.get_cluster_id() << std::endl;
}
outfile.close();
system("gnuplot -p -e \"plot 'data.txt' using 1:2:3 with points palette notitle\"");
remove("data.txt");
}