Skip to content

Commit

Permalink
commit of project 2 to run long part on cloud
Browse files Browse the repository at this point in the history
  • Loading branch information
Colin Patrick Gaffney committed Oct 24, 2018
1 parent d70f242 commit 88745c0
Show file tree
Hide file tree
Showing 9 changed files with 37 additions and 57 deletions.
Binary file modified hw1/deg_counts_plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added hw1/hdn.graph
Binary file not shown.
Binary file modified hw1/inlinks_email.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified hw1/inlinks_epinions.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified hw1/outlinks_email.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified hw1/outlinks_epinions.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified hw1/q1_plots.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
27 changes: 11 additions & 16 deletions hw1/q2-starter.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,38 +83,33 @@ def q2_2():
#Your code here:

def reachable_analysis(graph, name):
reached_out = Set([])
reached_out_cum = []
reached_in = Set([])
reached_in_cum = []
for i in range(100):
node = graph.GetRndNId()
bfs_out = snap.GetBfsTree(graph, node, True, False)
bfs_in = snap.GetBfsTree(graph, node, False, True)
for edge in bfs_out.Edges():
reached_out.add(edge.GetSrcNId())
reached_out.add(edge.GetDstNId())
for edge in bfs_in.Edges():
reached_in.add(edge.GetSrcNId())
reached_in.add(edge.GetDstNId())
reached_out_cum.append(len(reached_out))
reached_in_cum.append(len(reached_in))
reached_out_cum.append(bfs_out.GetNodes())
reached_in_cum.append(bfs_in.GetNodes())

reached_out_cum = sorted(reached_out_cum)
reached_in_cum = sorted(reached_in_cum)

y = reached_out_cum
if name == 'epinions':
if True:#name == 'epinions':
plt.plot(y, label='Reachability using outlinks for %s' % name)
plt.yscale('log')
else:
plt.plot(y, label='Reachability using outlinks for %s' % name)
#else:
# plt.plot(y, label='Reachability using outlinks for %s' % name)
plt.savefig('outlinks_%s.png' % name)
plt.close()

y = reached_in_cum
if name == 'email':
if True:#name == 'email':
plt.plot(y, label='Reachability using inlinks for %s' % name)
plt.yscale('log')
else:
plt.plot(y, label='Reachability using inlinks for %s' % name)
#else:
# plt.plot(y, label='Reachability using inlinks for %s' % name)
plt.savefig('inlinks_%s.png' % name)
plt.close()

Expand Down
67 changes: 26 additions & 41 deletions hw1/q4.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def build_hdn(graph):
print 'nodes = {}'.format(dgraph.GetNodes())
print 'edges = {}'.format(dgraph.GetEdges())
print 'density = {}'.format(2.0 * dgraph.GetEdges() / (dgraph.GetNodes() * (dgraph.GetNodes() - 1.0)))
print 'clust coef = {}'.format(snap.GetClustCf(dgraph, 100))
print 'clust coef = {}'.format(snap.GetClustCf(dgraph, 1000))
return dgraph

def load_graph():
Expand Down Expand Up @@ -89,10 +89,14 @@ def deg_counts_plot(deg_counts, color):
y = np.log10(y)
plt.plot(x, y, color=color, linestyle="", marker="o")

def get_neighbors(graph, nodeId):
def get_neighbors(graph, nodeId, othergraph=None):
n = set([])
for i in range(graph.GetNI(nodeId).GetDeg()):
n.add(graph.GetNI(nodeId).GetNbrNId(i))
nid = graph.GetNI(nodeId).GetNbrNId(i)
if othergraph is not None:
if not othergraph.IsNode(nid):
continue
n.add(nid)
return n

def max_clique(graph):
Expand Down Expand Up @@ -129,8 +133,6 @@ def top_five_sim(nid, metric):
nid_nb = get_neighbors(graph, nid)
nb = get_neighbors(graph, node.GetId())
if metric == 'CN':
print nid_nb
print nb
score = len(nid_nb.intersection(nb))
else:
score = len(nid_nb.intersection(nb)) / (1.0 * len(nid_nb.union(nb)))
Expand All @@ -154,14 +156,26 @@ def top_five_sim(nid, metric):

def contraction(graph, hdn):

supernodes = set([])

def contract_clique(node):
clique = get_neighbors(graph, node.GetId())
clique = get_neighbors(graph, node.GetId(), othergraph=hdn)
supernodeId = clique.pop()
assert(hdn.IsNode(supernodeId))
supernodes.add(supernodeId)
for nodeId in clique:
if nodeId in supernodes:
continue
for nbrId in get_neighbors(hdn, nodeId):
if not hdn.IsNode(nbrId):
continue
assert(hdn.IsNode(supernodeId))
assert(hdn.IsNode(nbrId))
hdn.AddEdge(supernodeId, nbrId)
assert(not supernodeId == nodeId)
assert(nodeId not in supernodes)
hdn.DelNode(nodeId)
graph.DelNode(nodeId)
#graph.DelNode(nodeId)

node = graph.BegNI()
while node < graph.EndNI():
Expand All @@ -174,48 +188,19 @@ def contract_clique(node):
print ''
print 'Contracted graph stats'
print 'clust cf = {}'.format(clust_cf)
print 'density = {}'.format(2 * contracted.GetEdges() / (contracted.GetNodes() * (contracted.GetNodes() - 1)))

def contraction(graph, hdn):

def contract_clique(node):
clique = get_neighbors(graph, node.GetId())
supernodeId = clique.pop()
for nodeId in clique:
for nbrId in get_neighbors(hdn, nodeId):
hdn.AddEdge(supernodeId, nbrId)
hdn.DelNode(nodeId)

for node in graph.Nodes():
if is_gene(node) and node.GetDeg() > 250:
contract_clique(node)

contracted = hdn
clust_cf = snap.GetClustCf(contracted, int(1000))
print ''
print 'Contracted graph stats'
print 'clust cf = {}'.format(clust_cf)
print 'density = {}'.format(2 * dgraph.GetEdges() / (dgraph.GetNodes() * (dgraph.GetNodes() - 1)))
print 'density = {}'.format(2.0 * contracted.GetEdges() / (contracted.GetNodes() * (contracted.GetNodes() - 1.0)))
print 'nodes = {}'.format(contracted.GetNodes())
print 'edges = {}'.format(contracted.GetEdges())

def main():
graph = load_graph()
print_graph_stats(graph)
make_plots(graph)
hdn = build_hdn(graph)
check_hdn_edges(hdn, graph)
max_clique(graph)
contraction(graph, hdn)
disease_sim(graph)
max_clique(graph)
contraction(graph, hdn)

def check_hdn_edges(hdn, graph):
for node in graph.Nodes():
if is_gene(node):
for i in get_neighbors(graph, node.GetId()):
for j in get_neighbors(graph, node.GetId()):
assert(hdn.IsEdge(i, j))

disease_sim(graph)

def print_graph_stats(graph):
print 'nodes = {}'.format(graph.GetNodes())

Expand Down

0 comments on commit 88745c0

Please sign in to comment.