-
Notifications
You must be signed in to change notification settings - Fork 2
/
LinkPredictionResults.py
124 lines (88 loc) · 4.5 KB
/
LinkPredictionResults.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
#######################################################################################################
# LinkPredictionResults.py
# Purpose: script runs 10 link prediction algorithms on training and testing network data in parallel
# version 1.2.0
# date: 01.28.2017
#######################################################################################################
# import module/script dependencies
import networkx as nx
import numpy as np
from collections import Counter
import operator
import LinkPrediction
import csv
def LabelDict(results, id, var):
'''
Function takes a json file of results (containing ice ids and labels) and two variables storing the id and labels
and returns a dictionary where the keys are the id and the values are the corresponding labels.
:param results: json file of ice ids and labels
:param id: variable storing ids
:param var: variable storing labels
:return:
'''
label_dict = {}
for res in results['results']['bindings']:
label_dict[str(res[str(id)]['value'].split('/')[-1]).encode('utf8')] = str(
res[str(var)]['value'].encode('utf8'))
return label_dict
def EdgeChecker(scores, edges):
'''
Function takes a dictionary of edges (keys) and scores (values) and a list of edges. Using the intersection of the
edges and keys edges a new dictionary is created.
:param scores: dictionary of edges (keys) and scores (values)
:param edges: list of tuples
:return: dictionary of tuples (keys) and scores (values)
'''
final_dict = {}
for edge in set(edges).intersection(set(scores.keys())):
final_dict[edge] = scores[edge]
return final_dict
def main():
#read in graphs
owl_graph = nx.read_gml('Network_Data/Trametinib_query_OWL_network.gml').to_undirected()
nets_graph = nx.read_gml('Network_Data/Trametinib_query_NETS_network.gml').to_undirected()
mid_graph = nx.read_gml('Network_Data/Trametinib_query_PART_network').to_undirected()
#run link predictions for each graph
nets_scores = LinkPrediction.katz(nets_graph, beta=0.001, max_power=5, weight=None, dtype=None)
nets_nonexist = list(nx.non_edges(nets_graph))
nets_preds = EdgeChecker(nets_scores, nets_nonexist)
owl_nonexist = list(nx.non_edges(owl_graph))
owl_scores = LinkPrediction.RPR(owl_graph, alpha = 0.15, beta = 0)
owl_preds = EdgeChecker(owl_scores, owl_nonexist)
#explore predictions
len(nets_preds) #1652
np.min(nets_preds.values())
np.mean(nets_preds.values())
np.max(nets_preds.values())
#print top 20 edges
sorted(Counter(sorted(nets_preds.values())).items(), key=lambda i: i[0]) #get distribution of counts
sorted_scores = sorted(owl_preds.items(), key=operator.itemgetter(1), reverse=True) #biggest first
sorted_scores = sorted(nets_preds.items(), key=operator.itemgetter(1), reverse=False) #smallest first
#investigate the top n items
edges = sorted_scores[0:20]
## Write results for use with with ranking methods
# graphs
graph = nx.read_gml('Network_Data/Trametinib_query_OWL_network.gml').to_undirected()
# graph = nx.read_gml('Network_Data/Trametinib_query_NETS_network.gml').to_undirected()
graph = nx.read_gml('Network_Data/DDI_reactome_query_NETS_network.gml').to_undirected()
methods = [LinkPrediction.DegreeProduct(graph, list(nx.non_edges(graph))),
LinkPrediction.ShortestPath(graph, list(nx.non_edges(graph))),
LinkPrediction.CommonNeighbors(graph, list(nx.non_edges(graph))),
LinkPrediction.AdamicAdvar(graph, list(nx.non_edges(graph))),
LinkPrediction.Jaccard(graph, list(nx.non_edges(graph))),
LinkPrediction.LHN(graph, list(nx.non_edges(graph))),
LinkPrediction.ResourceAllocation(graph, list(nx.non_edges(graph))),
LinkPrediction.Sorensen(graph, list(nx.non_edges(graph))),
LinkPrediction.katz(graph, beta=0.001, max_power=5, weight=None, dtype=None),
LinkPrediction.RPR(graph, alpha = 0.15, beta = 0)]
# method counter for labeling csv files
count = 0
for method in methods:
updated_res = EdgeChecker(method, list(nx.non_edges(graph)))
with open('Results/DDI_reactome/NETS_DDI ' + str(count) + '.csv', 'wb') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
for key, values in updated_res.items():
writer.writerow([key, values])
count += 1
if __name__ == '__main__':
main()