Skip to content

Commit

Permalink
Added network inference files
Browse files Browse the repository at this point in the history
  • Loading branch information
callahantiff committed Jan 12, 2018
1 parent 0d671eb commit f279f5b
Show file tree
Hide file tree
Showing 7 changed files with 1,557 additions and 0 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,5 @@
.DS_Store?
._*
*.idea/

Qiery Parser.py
68 changes: 68 additions & 0 deletions EvaluationMetrics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
#########################################################################################
# EvaluationMetrics.py
# Purpose: script contains methods for evaluating link prediction algorithm performance
# version 1.0
# date: 01.28.2017
#########################################################################################


# import module/script dependencies
import random
import heapq



def AUC(nonexist_scores, missing_scores):
'''
Function calculates the probability that a randomly chosen missing link is given a higher score than a randomly
chosen nonexistent link using procedures described by Lu & Zhou (2010)
(doi:http://dx.doi.org/10.1016/j.physa.2010.11.027T). The function takes a list containing missing, non-existent
edges, and predictions, and returns an AUC score. Current method is currently set to perform 1,000 comparisons.
:param nonexist_scores: list of nonexistent edges and scores
:param missing_scores: list of test edges and scores
:return: an integer which is the AUC score for that comparison
'''

# comparisons = len(nonexist_scores)*len(missing_scores) #HOW MANY??
comparisons = 1000
count = 0.0

for i in xrange(comparisons):
TN = random.sample(nonexist_scores.values(), 1)
TP = random.sample(missing_scores.values(), 1)

if TP > TN:
count += 1.0
if TP == TN:
count += 0.5

auc = count/comparisons

return auc



def KPrecision(auc, scores, testing_edges):
'''
Function calculates the ratio of relevant items selected from the top n items using procedures described by Lu &
Zhou (2010) (doi:http://dx.doi.org/10.1016/j.physa.2010.11.027T). The function takes a list containing missing and
non-existent edges, the list of all predictions, and returns a top k-precision score for 20% of scores.
:param auc: integer representing AUC score - to indicate whether the top or bottom of list should be assessed
:param scores: list of test edges and scores
:param testing_edges: list of nonexistent edges and scores
:return: an integer which is the precision for the top number of selected links
'''

#get 20% of edges
links = [int(len(scores)*0.20) if int(len(scores)*0.20) >= 1 else 1][0]

if auc < 0.5:
# pred_list = heapq.nsmallest(links, set([x[1] for x in scores.items()])) #returns n highest likelihood scores
pred_list = heapq.nsmallest(links, scores, key=lambda k: scores[k])
else:
# pred_list = heapq.nlargest(links, set([x[1] for x in scores.items()])) #returns n highest likelihood scores
pred_list = heapq.nlargest(links, scores, key=lambda k: scores[k])

y_pred = [0 if (i[0], i[1]) in testing_edges else 0 if (i[1], i[0]) in testing_edges else 1 for i in pred_list]

return float(y_pred.count(0))/links
Loading

0 comments on commit f279f5b

Please sign in to comment.