-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
0d671eb
commit f279f5b
Showing
7 changed files
with
1,557 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,3 +47,5 @@ | |
.DS_Store? | ||
._* | ||
*.idea/ | ||
|
||
Qiery Parser.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
######################################################################################### | ||
# EvaluationMetrics.py | ||
# Purpose: script contains methods for evaluating link prediction algorithm performance | ||
# version 1.0 | ||
# date: 01.28.2017 | ||
######################################################################################### | ||
|
||
|
||
# import module/script dependencies | ||
import random | ||
import heapq | ||
|
||
|
||
|
||
def AUC(nonexist_scores, missing_scores): | ||
''' | ||
Function calculates the probability that a randomly chosen missing link is given a higher score than a randomly | ||
chosen nonexistent link using procedures described by Lu & Zhou (2010) | ||
(doi:http://dx.doi.org/10.1016/j.physa.2010.11.027T). The function takes a list containing missing, non-existent | ||
edges, and predictions, and returns an AUC score. Current method is currently set to perform 1,000 comparisons. | ||
:param nonexist_scores: list of nonexistent edges and scores | ||
:param missing_scores: list of test edges and scores | ||
:return: an integer which is the AUC score for that comparison | ||
''' | ||
|
||
# comparisons = len(nonexist_scores)*len(missing_scores) #HOW MANY?? | ||
comparisons = 1000 | ||
count = 0.0 | ||
|
||
for i in xrange(comparisons): | ||
TN = random.sample(nonexist_scores.values(), 1) | ||
TP = random.sample(missing_scores.values(), 1) | ||
|
||
if TP > TN: | ||
count += 1.0 | ||
if TP == TN: | ||
count += 0.5 | ||
|
||
auc = count/comparisons | ||
|
||
return auc | ||
|
||
|
||
|
||
def KPrecision(auc, scores, testing_edges): | ||
''' | ||
Function calculates the ratio of relevant items selected from the top n items using procedures described by Lu & | ||
Zhou (2010) (doi:http://dx.doi.org/10.1016/j.physa.2010.11.027T). The function takes a list containing missing and | ||
non-existent edges, the list of all predictions, and returns a top k-precision score for 20% of scores. | ||
:param auc: integer representing AUC score - to indicate whether the top or bottom of list should be assessed | ||
:param scores: list of test edges and scores | ||
:param testing_edges: list of nonexistent edges and scores | ||
:return: an integer which is the precision for the top number of selected links | ||
''' | ||
|
||
#get 20% of edges | ||
links = [int(len(scores)*0.20) if int(len(scores)*0.20) >= 1 else 1][0] | ||
|
||
if auc < 0.5: | ||
# pred_list = heapq.nsmallest(links, set([x[1] for x in scores.items()])) #returns n highest likelihood scores | ||
pred_list = heapq.nsmallest(links, scores, key=lambda k: scores[k]) | ||
else: | ||
# pred_list = heapq.nlargest(links, set([x[1] for x in scores.items()])) #returns n highest likelihood scores | ||
pred_list = heapq.nlargest(links, scores, key=lambda k: scores[k]) | ||
|
||
y_pred = [0 if (i[0], i[1]) in testing_edges else 0 if (i[1], i[0]) in testing_edges else 1 for i in pred_list] | ||
|
||
return float(y_pred.count(0))/links |
Oops, something went wrong.