Skip to content

Commit

Permalink
fixed: setgnn; feat: load_line_expansion_dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
yizhihenpidehou committed Apr 27, 2024
1 parent b1ed256 commit 11c812b
Showing 1 changed file with 25 additions and 21 deletions.
46 changes: 25 additions & 21 deletions easygraph/datasets/hypergraph/loadDeepSetDatasets.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,45 @@
import torch
import os.path as osp

import numpy as np
import scipy.sparse as sp
import torch

from torch_geometric.data import Data
from torch_sparse import coalesce


__all__ = ["load_line_expansion_dataset"]
def load_line_expansion_dataset(path=None, dataset="cocitation-cora", train_percent = 0.5):


def load_line_expansion_dataset(
path=None, dataset="cocitation-cora", train_percent=0.5
):
# load edges, features, and labels.
print('Loading {} dataset...'.format(dataset))
print("Loading {} dataset...".format(dataset))

file_name = f'{dataset}.content'
file_name = f"{dataset}.content"
p2idx_features_labels = osp.join(path, dataset, file_name)
idx_features_labels = np.genfromtxt(p2idx_features_labels,
dtype=np.dtype(str))
idx_features_labels = np.genfromtxt(p2idx_features_labels, dtype=np.dtype(str))
# features = np.array(idx_features_labels[:, 1:-1])
features = sp.csr_matrix(idx_features_labels[:, 1:-1], dtype=np.float32)
# labels = encode_onehot(idx_features_labels[:, -1])
labels = torch.LongTensor(idx_features_labels[:, -1].astype(float))

print('load features')
print("load features")

# build graph
idx = np.array(idx_features_labels[:, 0], dtype=np.int32)
idx_map = {j: i for i, j in enumerate(idx)}

file_name = f'{dataset}.edges'
file_name = f"{dataset}.edges"
p2edges_unordered = osp.join(path, dataset, file_name)
edges_unordered = np.genfromtxt(p2edges_unordered,
dtype=np.int32)
edges_unordered = np.genfromtxt(p2edges_unordered, dtype=np.int32)

edges = np.array(list(map(idx_map.get, edges_unordered.flatten())),
dtype=np.int32).reshape(edges_unordered.shape)

print('load edges')
edges = np.array(
list(map(idx_map.get, edges_unordered.flatten())), dtype=np.int32
).reshape(edges_unordered.shape)

print("load edges")

# From adjacency matrix to edge_list
edge_index = edges.T
Expand All @@ -52,16 +57,15 @@ def load_line_expansion_dataset(path=None, dataset="cocitation-cora", train_perc
data = Data(
x=torch.FloatTensor(np.array(features[:num_nodes].todense())),
edge_index=torch.LongTensor(edge_index),
y=labels[:num_nodes])

y=labels[:num_nodes],
)

# used user function to override the default function.
# the following will also sort the edge_index and remove duplicates.
total_num_node_id_he_id = len(np.unique(edge_index))
data.edge_index, data.edge_attr = coalesce(data.edge_index,
None,
total_num_node_id_he_id,
total_num_node_id_he_id)
data.edge_index, data.edge_attr = coalesce(
data.edge_index, None, total_num_node_id_he_id, total_num_node_id_he_id
)
n_x = num_nodes
# n_x = n_expanded
num_class = len(np.unique(labels[:num_nodes].numpy()))
Expand All @@ -71,4 +75,4 @@ def load_line_expansion_dataset(path=None, dataset="cocitation-cora", train_perc
data.train_percent = train_percent
data.num_hyperedges = num_he

return data
return data

0 comments on commit 11c812b

Please sign in to comment.