-
Notifications
You must be signed in to change notification settings - Fork 52
/
dataset.py
30 lines (25 loc) · 1.2 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import datetime
import pandas as pd
class movielens_1m(object):
def __init__(self):
self.user_data, self.item_data, self.score_data = self.load()
def load(self):
path = "movielens/ml-1m"
profile_data_path = "{}/users.dat".format(path)
score_data_path = "{}/ratings.dat".format(path)
item_data_path = "{}/movies_extrainfos.dat".format(path)
profile_data = pd.read_csv(
profile_data_path, names=['user_id', 'gender', 'age', 'occupation_code', 'zip'],
sep="::", engine='python'
)
item_data = pd.read_csv(
item_data_path, names=['movie_id', 'title', 'year', 'rate', 'released', 'genre', 'director', 'writer', 'actors', 'plot', 'poster'],
sep="::", engine='python', encoding="utf-8"
)
score_data = pd.read_csv(
score_data_path, names=['user_id', 'movie_id', 'rating', 'timestamp'],
sep="::", engine='python'
)
score_data['time'] = score_data["timestamp"].map(lambda x: datetime.datetime.fromtimestamp(x))
score_data = score_data.drop(["timestamp"], axis=1)
return profile_data, item_data, score_data