-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdemo_over_hyd.m
163 lines (157 loc) · 7.97 KB
/
demo_over_hyd.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
clear;
%Example demonstration of the hybrid initialization (random & NNDSVD initialization) for overlapping communoty detection
%====================
%Load the network datasets
adj = load('data/Reddit26_adj.txt'); %Adjacency matrix
att = load('data/Reddit26_node_attri.txt'); %Network attribtue matrix
gnd_path = 'data/Reddit26_gnd.txt'; %Path of ground-truth
res_path_TA = 'Reddit26_res_TA(over_hyd).txt'; %Path to save the (overlapping community detection) result of DHCD T-A
res_path_AT = 'Reddit26_res_AT(over_hyd).txt'; %Path to save the (overlapping community detection) reuslt of DHCD A-T
%==========
%adj = load('data/Reddit25_adj.txt');
%att = load('data/Reddit25_node_attri.txt');
%gnd_path = 'data/Reddit25_gnd.txt';
%res_path_TA = 'Reddit25_res_TA(over_hyd).txt';
%res_path_AT = 'Reddie25_res_AT(over_hyd).txt';
%====================
%Get the network parameters
num_nodes = size(adj,1); %Number of nodes
num_atts = size(att, 2); %Number of node attributes
num_topo_clus = 3; %Number of topology clusters
num_att_clus = num_topo_clus; %Number of attribute clusters
%====================
max_iter = 1e4; %Maximum number of optimization iteratons
min_error = 1e-5; %Minimum relative error to determine the convergence of optimization
%==========
num_runs = 10; %Number of independent runs of DHCD
lambd = 1e3;
%====================
%Remove self-connectd edges
for i=1:num_nodes
adj(i, i) = 0;
end
%==========
adj = sparse(adj);
att = sparse(att);
%====================
params = [0.1:0.1:0.9, 1:1:10];
[~, num_params] = size(params); %Number of parameter settings
for l=1:num_params
%==============================
%T-A Channel, i.e., DHCD T-A
alpha = params(l);
fprintf('Alpha: %f\n', alpha);
%====================
[topo_mem_init, ~] = NNDSVD(adj, num_topo_clus, 0); %Initialize the topology cluster membership matrix, i.e., X
[~, att_desc_init] = NNDSVD(att, num_att_clus, 0); %Initialize the attribute description matrix, i.e., Z
att_desc_init = att_desc_init';
trans_TA_init = rand(num_topo_clus, num_att_clus); %Initialize the T-A transition matrix, i.e., U
[topo_mem_TA,att_desc_TA,trans_TA,obj] = DHCD_TA(adj,att,topo_mem_init,att_desc_init,trans_TA_init,alpha,lambd,max_iter,min_error);
fprintf('T-A Obj. %8.4f\n', obj);
%==========
%Extract the disjoint community detection result
[~, dis_labels] = max(topo_mem_TA*trans_TA, [], 2);
%Extract and save the overlapping community detection result
save_over_res(dis_labels, trans_TA, res_path_TA);
%Evaluate the performance of current community detection result
[Fsc, Jac] = over_eva(gnd_path, res_path_TA);
fprintf('T-A Ch. F-score: %8.4f; Jaccard: %8.4f\n', [Fsc, Jac]);
fid = fopen('DHCD_T-A(over_hyd).txt', 'at');
fprintf(fid, 'T-A Ch. Alpha: %8.4f; Obj: %8.4f; F-Score: %8.4f; Jaccard: %8.4f\n', [alpha, obj, Fsc, Jac]);
fclose(fid);
%====================
%Independently run the DHCD T-A algorihtm multiple times
for t=2:num_runs
trans_TA_init = rand(num_topo_clus, num_att_clus); %Initialize the T-A transition matrix, i.e., U
[cur_topo_mem_TA,cur_att_desc_TA,cur_trans_TA,cur_obj] = DHCD_TA(adj,att,topo_mem_init,att_desc_init,trans_TA_init,alpha,lambd,max_iter,min_error);
fprintf('T-A Obj. %8.4f\n', cur_obj);
%==========
%Extract the disjoint community detection result
[~, dis_labels] = max(cur_topo_mem_TA*cur_trans_TA, [], 2);
%Extract and save the overlapping community detection result
save_over_res(dis_labels, cur_trans_TA, res_path_TA);
%Evaluate the performance of current community detection result
[Fsc, Jac] = over_eva(gnd_path, res_path_TA);
fprintf('T-A Ch. F-score: %8.4f; Jaccard: %8.4f\n', [Fsc, Jac]);
fid = fopen('DHCD_T-A(over_hyd).txt', 'at');
fprintf(fid, 'T-A Ch. Alpha: %8.4f; Obj: %8.4f; F-Score: %8.4f; Jaccard: %8.4f\n', [alpha, cur_obj, Fsc, Jac]);
fclose(fid);
%==========
%Update the best community detection result based on the value of objective function
if cur_obj<obj
topo_mem_TA = cur_topo_mem_TA;
att_desc_TA = cur_att_desc_TA;
trans_TA = cur_trans_TA;
obj = cur_obj;
end
end
%======================
%Evalute the performance of the best community detection result
[~, dis_labels] = max(topo_mem_TA*trans_TA, [], 2);
save_over_res(dis_labels, trans_TA, res_path_TA);
[Fsc, Jac] = over_eva(gnd_path, res_path_TA);
fprintf('Min. Obj. T-A Ch. F-score: %8.4f; Jaccard: %8.4f\n', [Fsc, Jac]);
fprintf('====================\n');
fid = fopen('DHCD_T-A(over_hyd).txt', 'at');
fprintf(fid, 'Obj. Min. T-A Ch. Alpha: %8.4f; Obj: %8.4f; F-Score: %8.4f; Jaccard: %8.4f\n', [alpha, obj, Fsc, Jac]);
fprintf(fid, '====================\n');
fclose(fid);
%==============================
%T-A Channel, i.e., DHCD T-A
beta = alpha;
fprintf('Beta: %f\n', beta);
%====================
%Initialize the attribute cluster membership matrix & attribute description matrix, i.e., Y & Z
[att_mem_init, att_desc_init] = NNDSVD(att, num_att_clus, 0);
att_desc_init = att_desc_init';
trans_AT_init = rand(num_att_clus, num_topo_clus); %Initialize the A-T transition matrix, i.e., V
[att_desc_AT,att_mem_AT,trans_AT,obj] = DHCD_AT(adj,att,att_mem_init,att_desc_init,trans_AT_init,beta,lambd,max_iter,min_error);
fprintf('A-T Obj. %8.4f\n', obj);
%==========
%Extract the disjoint community detection result
[~, dis_labels] = max(att_mem_AT*trans_AT, [], 2);
%Extract and save the overlapping community detection result
save_over_res(dis_labels, trans_AT, res_path_AT);
%Evaluate the performance of current community detection result
[Fsc, Jac] = over_eva(gnd_path, res_path_AT);
fprintf('A-T Ch. F-score: %8.4f; Jaccard: %8.4f\n', [Fsc, Jac]);
fid = fopen('DHCD_A-T(over_hyd).txt', 'at');
fprintf(fid, 'A-T Ch. Beta: %8.4f; Obj: %8.4f; F-Score: %8.4f; Jaccard: %8.4f\n', [beta, obj, Fsc, Jac]);
fclose(fid);
%====================
%Independently run the DHCD A-T algorihtm multiple times
for t=2:num_runs
trans_AT_init = rand(num_att_clus, num_topo_clus); %Initialize the A-T transition matrix, i.e., V
[cur_att_desc_AT,cur_att_mem_AT,cur_trans_AT,cur_obj] = DHCD_AT(adj,att,att_mem_init,att_desc_init,trans_AT_init,beta,lambd,max_iter,min_error);
fprintf('A-T Obj. %8.4f\n', cur_obj);
%==========
%Extract the disjoint community detection result
[~, dis_labels] = max(cur_att_mem_AT*cur_trans_AT, [], 2);
%Extract and save the overlapping community detection result
save_over_res(dis_labels, cur_trans_AT, res_path_AT);
%Evaluate the performance of current community detection result
[Fsc, Jac] = over_eva(gnd_path, res_path_AT);
fprintf('A-T Ch. F-score: %8.4f; Jaccard: %8.4f\n', [Fsc, Jac]);
fid = fopen('DHCD_A-T(over_hyd).txt', 'at');
fprintf(fid, 'A-T Ch. Beta: %8.4f; Obj: %8.4f; F-Score: %8.4f; Jaccard: %8.4f\n', [beta, cur_obj, Fsc, Jac]);
fclose(fid);
%==========
%Update the best community detection result based on the value of objective function
if cur_obj<obj
att_mem_AT = cur_att_mem_AT;
att_desc_AT = cur_att_desc_AT;
trans_AT = cur_trans_AT;
obj = cur_obj;
end
end
%====================
[~, dis_labels] = max(att_mem_AT*trans_AT, [], 2);
save_over_res(dis_labels, trans_AT, res_path_AT);
[Fsc, Jac] = over_eva(gnd_path, res_path_AT);
fprintf('Min. Obj. A-T Ch. F-score: %8.4f; Jaccard: %8.4f\n', [Fsc, Jac]);
fprintf('====================\n');
fid = fopen('DHCD_A-T(over_hyd).txt', 'at');
fprintf(fid, 'Obj. Min. A-T Ch. Beta: %8.4f; Obj: %8.4f; F-Score: %8.4f; Jaccard: %8.4f\n', [beta, obj, Fsc, Jac]);
fprintf(fid, '====================\n');
fclose(fid);
end