-
Notifications
You must be signed in to change notification settings - Fork 0
/
manual_codes.py
87 lines (55 loc) · 2.28 KB
/
manual_codes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import matplotlib
import nltk
import pandas as pd
import networkx as nx
matplotlib.use('TkAgg')
from matplotlib import pyplot as plt
import itertools
df = pd.read_csv("./Abstract/paypal_dv.csv", error_bad_lines=False, sep=';')
def get_edges(data, column):
series = data[column].dropna().apply(lambda x: x.split(" | "))
cross = series.apply(lambda x: list(itertools.combinations(x, 2)))
lists = [item for sublist in cross for item in sublist]
source = [i[0][:3] for i in lists]
target = [i[1][:3] for i in lists]
edges = pd.DataFrame({"source": source, "target": target})
edges["weight"] = 1
return edges.groupby(by=["source", "target"], as_index=False)["weight"].sum()
df_edges = get_edges(data=df, column="DWPI Manual Codes")
g = nx.from_pandas_edgelist(df_edges, source="source", target="target", edge_attr=["weight"],create_using=nx.Graph)
df = df_edges
clubs = list(df.source.unique())
people = list(df.target.unique())
dict(zip(clubs, clubs))
plt.figure(figsize=(12, 12))
# 1. Create the graph
# 2. Create a layout for our nodes
layout = nx.spring_layout(g,iterations=50)
# 3. Draw the parts we want
# Edges thin and grey
# People small and grey
# Clubs sized according to their number of connections
# Clubs blue
# Labels for clubs ONLY
# People who are highly connected are a highlighted color
# Go through every club name, ask the graph how many
# connections it has. Multiply that by 80 to get the circle size
club_size = [g.degree(club) * 120 for club in clubs]
nx.draw_networkx_nodes(g,
layout,
nodelist=clubs,
node_size=club_size, # a LIST of sizes, based on g.degree
node_color='lightblue')
# Draw EVERYONE
nx.draw_networkx_nodes(g, layout, nodelist=people, node_color='#cccccc', node_size=120)
# Draw POPULAR PEOPLE
popular_people = [person for person in people if g.degree(person) > 10]
nx.draw_networkx_nodes(g, layout, nodelist=popular_people, node_color='orange', node_size=80)
nx.draw_networkx_edges(g, layout, width=1, edge_color="#cccccc")
node_labels = dict(zip(clubs, clubs))
nx.draw_networkx_labels(g, layout, labels=node_labels)
# 4. Turn off the axis because I know you don't want it
plt.axis('off')
plt.title("Class Patents")
# 5. Tell matplotlib to show it
plt.show()