-
Notifications
You must be signed in to change notification settings - Fork 198
/
Copy pathmovie-recommendations-als-1m.py
41 lines (32 loc) · 1.38 KB
/
movie-recommendations-als-1m.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
import sys
from pyspark import SparkConf, SparkContext
from pyspark.mllib.recommendation import ALS, Rating
def loadMovieNames():
movieNames = {}
with open("ml-1m/movies.dat") as f:
for line in f:
fields = line.split('::')
movieNames[int(fields[0])] = fields[1].decode('ascii', 'ignore')
return movieNames
conf = SparkConf().setMaster("local[*]").setAppName("MovieRecommendationsALS")
sc = SparkContext(conf = conf)
sc.setCheckpointDir('checkpoint')
print("\nLoading movie names...")
nameDict = loadMovieNames()
data = sc.textFile("file:///E:/SparkCourse/ml-1m/ratings.dat")
ratings = data.map(lambda l: l.split("::")).map(lambda l: Rating(int(l[0]), int(l[1]), float(l[2]))).cache()
# Build the recommendation model using Alternating Least Squares
print("\nTraining recommendation model...")
rank = 10
numIterations = 20
model = ALS.train(ratings, rank, numIterations)
userID = int(sys.argv[1])
print("\nRatings for user ID " + str(userID) + ":")
userRatings = ratings.filter(lambda l: l[0] == userID)
for rating in userRatings.collect():
print(nameDict[int(rating[1])] + ": " + str(rating[2]))
print("\nTop 10 recommendations:")
recommendations = model.recommendProducts(userID, 10)
for recommendation in recommendations:
print(nameDict[int(recommendation[1])] + \
" score " + str(recommendation[2]))