-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapriori.cpp
176 lines (127 loc) · 5.53 KB
/
apriori.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
/************************************************************************
Author - Aman Maldar
Simple code - parallel version of data association.
Static value of minSupport=1. This will show all the pairs generated.
File = 6entries.txt
Limitation - Generates only till set of 4 pairs as of now.
It needs multiple changes for the data structure as well. Need to reconfigure it.
Data: (6entries.txt)
2 3 4
1 2 3 4 5
4 5 6 7
0 2
1 2 3 4
2 3 5 7 8
*************************************************************************/
#include "apriori.hcu"
#include "functions.hcu"
double minSupp = 0.001; // 0.001;
void Execute(int argc){
parse_database(argc);
//return;
vector <int> A; //= globalDataset // convert itemId_TidMapping into long array
vector <int> B ; // = globalDatasetThreadIndex;
//int *globalDatasetCpu = (int *) malloc (sizeof(int)* totalItems);
int *A_cpu = (int *) malloc (sizeof(int)* totalItems);
int *B_cpu = (int *) malloc (sizeof(int)* (maxItemID+1)); //index array lenght same as number of items
int k =0; // global pointer for globalMap
//globalDatasetThreadIndex.push_back(k);
for(int i=0;i<=maxItemID;i++){
B.push_back(k);
B_cpu[i] = k;
vector <int> tmp11 = itemId_TidMapping[i]; // copy entire vector
//tmp11 = {1,2,3};
for(int j=1;j<tmp11.size();j++){ // last item should be inclusive, first element is excluded
A.push_back(tmp11[j]);
A_cpu[k] = tmp11[j];
//globalDatasetCpu[k] = globalDataset[k];
k++;
}
A.push_back(-1); // seperate mappings by -1
A_cpu[k] = -1;
k++;
// globalDatasetThreadIndex.push_back(k);
}
/* cout << " Printing itemId_TidMapping as array: " << endl;
for(int i =0;i<A.size();i++){
//A_cpu[i] = A[i];
cout << A[i] << " " ;
}cout << endl;*/
cout << " Printing itemId_TidMapping copy: " << totalItems << endl;
for(int i =0;i<totalItems;i++){
cout << A_cpu[i] << " " ;
}cout << endl;
/* cout << " Printing starting indexes " << endl;
for(int i =0;i<B.size();i++){
cout << B[i] << " " ;
}cout << endl;*/
cout << " Printing starting indexes " << endl;
for(int i =0;i<B.size();i++){
cout << B_cpu[i] << " " ;
}cout << endl;
return;
//int numberOfBlocks = 1;
//int threadsInBlock = 100;
L1.push_back(0); // initialized first index with 0 as we are not using it.
//minSupport = round(minSupp * TID_Transactions);
minSupport = 1;
// Following code generates single items which have support greater than min_sup
// compare the occurrence of the object against minSupport
cout << "\n Support:" << minSupport << endl << "\n";
//Generate L1 - filtered single items ? I think this should be C1, not L1.
for (int i=0; i<= maxItemID; i++)
{
if(itemIDcount[i] >= minSupport){
L1.push_back(i); //push TID into frequentItem
one_freq_itemset++;
//cout << "1 Frequent Item is: (" << i << ") Freq is: " << itemIDcount[i] << endl;
}
}
// cout << "one_freq_itemset: " << one_freq_itemset << endl << "\n";
//******************************************************************************************************************
//Generate L2 . Make a pair of frequent items in L1
for (int i=0;i <= L1.size() -1 -1; i++) //-1 is done for eliminating first entry
{
for (int j=i+1;j <= L1.size() -1; j++){
twoStruct.a = L1[i];
twoStruct.b = L1[j];
L2.push_back(twoStruct);
//cout << "2 Items are: (" <<L1[i]<< "," << L1[j] << ") " << endl;
}
}
//******************************************************************************************************************
//cout << "two_freq_itemset: " << two_freq_itemset << endl << "\n";
//******************************************************************************************************************
//work till pair of 2
return;
} // end Execute
int main(int argc, char **argv){
auto start = chrono::high_resolution_clock::now();
Execute(argc);
auto end = chrono::high_resolution_clock::now();
chrono::duration<double> el = end - start;
cout<<"Execution time is: " << el.count() * 1000 << " mS " << endl;
return 0;
}
/*
__shared__ int smem[128];
__global__ void prefix_scan_kernel (int *b_d, int *a_d, int n, int depth) {
while (tid < n) {
smem[threadIdx.x] = a_d[tid]; // each thread copy data to shared memory
__syncthreads(); // wait for all threads
//if (tid%16384 == 0 ) { smem[tid] += res; __syncthreads(); } // result are written at the end*
offset = 1; //1->2->4->8
for (d =0; d < depth ; d++) {
if (threadIdx.x >= offset) {
smem[threadIdx.x] += smem[threadIdx.x-offset] ; //after writing to smem do synchronize
__syncthreads();
} // end if
offset *=2;
} // end for loop
b_d[tid] = smem[threadIdx.x]; // *write the result to array b_d[tid] location
__syncthreads(); // wait for all threads to write results
//if ((tid + 1) % 16384 == 0) { inc++; printf("\n incremented %d times\n", inc);}
tid += 16384; //there are no actual grid present, we just increment the tid to fetch next elemennts from input array.
} // end while (tid < n)
} // end kernel function
*/