Skip to content

Commit

Permalink
Merge pull request #19 from ComplexData-MILA/elahe
Browse files Browse the repository at this point in the history
Updated new features
  • Loading branch information
shenyangHuang authored Jan 21, 2024
2 parents 197e1c2 + c027b86 commit 689d1bc
Show file tree
Hide file tree
Showing 20 changed files with 1,794 additions and 1,980 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#dataset
*.png
*.pdf
>>>>>>> master
*.cpython-39.pyc
*.pyc
*.xz
Expand Down
4 changes: 2 additions & 2 deletions docs/tutorials/Features.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -423,7 +423,7 @@
"outputs": [],
"source": [
"node_engagement = get_avg_node_engagement(data)\n",
"plot_for_snapshots(node_engagement, filename=f\"{dataset.name}_avg_node_engagement_per_ts\", y_title=\"node engagement\")"
"plot_for_snapshots(node_engagement, y_title=\"node engagement\", filename=\"./\" + f\"{dataset.name}_avg_node_engagement_per_ts\")"
]
},
{
Expand All @@ -447,7 +447,7 @@
"outputs": [],
"source": [
"node_activity = get_avg_node_activity(data)\n",
"plot_for_snapshots(node_activity, filename=f\"{dataset.name}_avg_node_activity_per_ts\", y_title=\"node activity\")"
"plot_for_snapshots(node_activity, y_title=\"node activity\", filename=\"./\" + f\"{dataset.name}_avg_node_activity_per_ts\")"
]
}
],
Expand Down
4 changes: 2 additions & 2 deletions docs/tutorials/data_loader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -256,11 +256,11 @@
}
],
"source": [
"from tgx.utils.graph_stat import get_avg_node_engagement\n",
"from tgx.utils.stat import get_avg_node_engagement\n",
"\n",
"node_engagement = get_avg_node_engagement(data)\n",
"filename = f\"{dataset.name}_ave_node_engagement_per_ts\"\n",
"tgx.utils.plot_for_snapshots(node_engagement, filename, \"node engagement\", plot_title=\"Average node engagement\")"
"tgx.utils.plot_for_snapshots(node_engagement, y_title=\"avg. node engagement\", filename= \"./\" + filename)"
]
},
{
Expand Down
63 changes: 32 additions & 31 deletions examples/data_viz.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,61 @@
import tgx
from tgx.utils.graph_utils import list2csv
from tgx.utils.plotting_utils import plot_for_snapshots

"""
1. load a dataset
2. load into a graph
3. discretize the graph
4. save the graph back to a csv
master example to show all visualization in tgx
"""

#! load the datasets
# dataset = tgx.builtin.uci() #built in datasets
# data_name = "uci"
#* load built in datasets
dataset = tgx.builtin.uci() #built in datasets

data_name = "tgbn-token" #"tgbl-wiki" #"tgbl-review"
dataset = tgx.tgb_data(data_name) #tgb datasets
#* load the tgb datasets
# data_name = "tgbl-wiki" #"tgbl-review"
# dataset = tgx.tgb_data(data_name) #tgb datasets


time_scale = "daily" #"hourly"
ctdg = tgx.Graph(dataset)
dtdg = ctdg.discretize(time_scale=time_scale)
time_scale = "weekly" #"daily"
dtdg = ctdg.discretize(time_scale=time_scale)[0]


#! plotting the statistics, works
tgx.degree_over_time(dtdg, network_name=data_name)
tgx.nodes_over_time(dtdg, network_name=data_name)
tgx.edges_over_time(dtdg, network_name=data_name)
tgx.nodes_and_edges_over_time(dtdg, network_name=data_name)
#* plotting the statistics
tgx.degree_over_time(dtdg, network_name=dataset.name)
tgx.nodes_over_time(dtdg, network_name=dataset.name)
tgx.edges_over_time(dtdg, network_name=dataset.name)
tgx.nodes_and_edges_over_time(dtdg, network_name=dataset.name)

tgx.TET(dtdg,
network_name=data_name,
network_name=dataset.name,
figsize = (9, 5),
axis_title_font_size = 24,
ticks_font_size = 24)


tgx.TEA(dtdg,
network_name=data_name)
network_name=dataset.name)



#! compute statistics
#* compute statistics
test_ratio = 0.15
tgx.get_reoccurrence(dtdg, test_ratio=test_ratio)
tgx.get_surprise(dtdg, test_ratio=test_ratio)

#* these two much faster on dtdgs
tgx.get_avg_node_activity(dtdg)
tgx.get_reoccurrence(ctdg, test_ratio=test_ratio)
tgx.get_surprise(ctdg, test_ratio=test_ratio)
tgx.get_novelty(dtdg)


# Number of Connected Components
tgx.connected_components_per_ts(dtdg, network_name=dataset.name)

# Size of Largest Connected Component
component_sizes = tgx.size_connected_components(dtdg)
largest_component_sizes = [max(inner_list) if inner_list else 0 for inner_list in component_sizes]
filename = f"{dataset.name}_largest_connected_component_size"
plot_for_snapshots(largest_component_sizes, y_title="Size of Largest Connected Component", filename="./"+filename)

# Average Node Engagement
engagements = tgx.get_avg_node_engagement(dtdg)
filename = f"{dataset.name}_average_node_engagement"
plot_for_snapshots(engagements, y_title="Average Engagement", filename="./"+filename)

# #! statistics to be updated and fixed
# #TODO
# tgx.degree_density()
# tgx.connected_components_per_ts()
# tgx.size_connected_components()
# tgx.get_avg_node_engagement()
# Degree Density
tgx.degree_density(dtdg, k=3, network_name=dataset.name)
20 changes: 0 additions & 20 deletions examples/test_package2.py

This file was deleted.

28 changes: 14 additions & 14 deletions tgx/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from tgx.classes.graph import Graph

from tgx.data.builtin import builtin
from tgx.data.tgb import tgb_data

from tgx.io.read import read_csv
from tgx.io.write import write_csv

from tgx.viz.TEA import TEA
from tgx.viz.TET import TET

from tgx.utils.stat import *
from tgx.utils.graph_utils import *

from tgx.classes.graph import Graph

from tgx.data.builtin import builtin
from tgx.data.tgb import tgb_data

from tgx.io.read import read_csv
from tgx.io.write import write_csv

from tgx.viz.TEA import TEA
from tgx.viz.TET import TET

from tgx.utils.stat import *
from tgx.utils.graph_utils import *
168 changes: 84 additions & 84 deletions tgx/classes/.ipynb_checkpoints/graph-checkpoint.py
Original file line number Diff line number Diff line change
@@ -1,85 +1,85 @@
import networkx as nx
from typing import Optional


class Graph():
def __init__(self,
edgelist: Optional[dict] = None,
discretized: Optional[bool] = True):
"""
Create a Graph object with specific characteristics
Args:
edgelist: a dictionary of temporal edges in the form of {t: {(u, v), freq}}
discretized: whether the given edgelist was discretized or not
"""

self.edgelist = edgelist
self.subsampled_graph = None
if discretized:
self.discrite_graph = self._generate_graph()
self.discrite_edgelist = edgelist
else:
self.continuous_edgelist = edgelist


def number_of_nodes(self, edgelist: dict = None) -> int:
"""
Calculate total number of nodes present in an edgelist
"""
if self.edgelist is None:
return []
elif edgelist is None:
edgelist = self.edgelist
node_list = {}
for _, edge_data in edgelist.items():
for (u,v), _ in edge_data.items():
if u not in node_list:
node_list[u] = 1
if v not in node_list:
node_list[v] = 1
return len(node_list.keys())

def nodes(self) -> list:
"""
Return a list of nodes present in an edgelist
"""
node_list = {}
for _, edge_data in self.edgelist.items():
for (u,v), _ in edge_data.items():
if u not in node_list:
node_list[u] = 1
if v not in node_list:
node_list[v] = 1

self.node_list = list(node_list.keys())
return list(node_list.keys())

def _generate_graph(self,
edgelist: Optional[dict] = None
) -> list:
'''
Generate a list of graph snapshots. Each snapshot is a
Networkx graph object.
Parameters:
edgelist: a dictionary containing in the form of {t: {(u, v), freq}}
Returns:
G_times: a list of networkx graphs
'''
if self.edgelist is None:
return []
elif edgelist is None:
edgelist = self.edgelist
G_times = []
G = nx.Graph()
cur_t = 0
for ts, edge_data in edgelist.items():
for (u,v), n in edge_data.items():
if (ts != cur_t):
G_times.append(G)
G = nx.Graph()
cur_t = ts
G.add_edge(u, v, freq=n)
G_times.append(G)
return G_times

import networkx as nx
from typing import Optional


class Graph():
def __init__(self,
edgelist: Optional[dict] = None,
discretized: Optional[bool] = True):
"""
Create a Graph object with specific characteristics
Args:
edgelist: a dictionary of temporal edges in the form of {t: {(u, v), freq}}
discretized: whether the given edgelist was discretized or not
"""

self.edgelist = edgelist
self.subsampled_graph = None
if discretized:
self.discrite_graph = self._generate_graph()
self.discrite_edgelist = edgelist
else:
self.continuous_edgelist = edgelist


def number_of_nodes(self, edgelist: dict = None) -> int:
"""
Calculate total number of nodes present in an edgelist
"""
if self.edgelist is None:
return []
elif edgelist is None:
edgelist = self.edgelist
node_list = {}
for _, edge_data in edgelist.items():
for (u,v), _ in edge_data.items():
if u not in node_list:
node_list[u] = 1
if v not in node_list:
node_list[v] = 1
return len(node_list.keys())

def nodes(self) -> list:
"""
Return a list of nodes present in an edgelist
"""
node_list = {}
for _, edge_data in self.edgelist.items():
for (u,v), _ in edge_data.items():
if u not in node_list:
node_list[u] = 1
if v not in node_list:
node_list[v] = 1

self.node_list = list(node_list.keys())
return list(node_list.keys())

def _generate_graph(self,
edgelist: Optional[dict] = None
) -> list:
'''
Generate a list of graph snapshots. Each snapshot is a
Networkx graph object.
Parameters:
edgelist: a dictionary containing in the form of {t: {(u, v), freq}}
Returns:
G_times: a list of networkx graphs
'''
if self.edgelist is None:
return []
elif edgelist is None:
edgelist = self.edgelist
G_times = []
G = nx.Graph()
cur_t = 0
for ts, edge_data in edgelist.items():
for (u,v), n in edge_data.items():
if (ts != cur_t):
G_times.append(G)
G = nx.Graph()
cur_t = ts
G.add_edge(u, v, freq=n)
G_times.append(G)
return G_times


Binary file removed tgx/classes/__pycache__/graph.cpython-39.pyc
Binary file not shown.
Loading

0 comments on commit 689d1bc

Please sign in to comment.