-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b35d3b3
commit a9b1aea
Showing
5 changed files
with
601 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
from functools import reduce | ||
from scipy.special import expit | ||
import numpy as np | ||
|
||
|
||
# This class uses user and playlist features datasets to simulate users responses to a list of recommendations | ||
class ContextualEnvironment(): | ||
def __init__(self, user_features, playlist_features, user_segment, n_recos): | ||
self.user_features = user_features | ||
self.playlist_features = playlist_features | ||
self.user_segment = user_segment | ||
self.n_recos = n_recos | ||
self.th_segment_rewards = np.zeros(user_features.shape[0]) | ||
self.th_rewards = np.zeros(user_features.shape[0]) | ||
self.compute_optimal_theoretical_rewards() | ||
self.compute_segment_optimal_theoretical_rewards() | ||
|
||
# Computes expected reward for each user given their recommendations | ||
def compute_theoretical_rewards(self, batch_user_ids, batch_recos): | ||
batch_user_features = np.take(self.user_features, batch_user_ids, axis = 0) | ||
batch_playlist_features = np.take(self.playlist_features, batch_recos, axis = 0) | ||
n_users = len(batch_user_ids) | ||
th_reward = np.zeros(n_users) | ||
for i in range(n_users): | ||
probas = expit(batch_user_features[i].dot(batch_playlist_features[i].T)) | ||
th_reward[i] = 1 - reduce(lambda x,y : x * y, 1 - probas) | ||
return th_reward | ||
|
||
# Computes list of n recommendations with highest expected reward for each user | ||
def compute_optimal_recos(self, batch_user_ids, n): | ||
batch_user_features = np.take(self.user_features, batch_user_ids, axis = 0) | ||
n_users = len(batch_user_ids) | ||
probas = batch_user_features.dot(self.playlist_features.T) | ||
optim = np.argsort(-probas)[:, :n] | ||
return optim | ||
|
||
# Computes highest expected reward for each user | ||
def compute_optimal_theoretical_rewards(self): | ||
n_users = self.user_features.shape[0] | ||
u = 0 | ||
step = 100000 | ||
while u < n_users: | ||
users_ids = range(u, min(n_users, u + step)) | ||
opt_recos = self.compute_optimal_recos(users_ids, self.n_recos) | ||
opt_rewards = self.compute_theoretical_rewards(users_ids, opt_recos) | ||
self.th_rewards[u:min(n_users, u + step)] = opt_rewards | ||
u += step | ||
return | ||
|
||
# Computes list of n recommendations with highest expected reward for each segment | ||
def compute_segment_optimal_recos(self, n): | ||
n_segments = len(np.unique(self.user_segment)) | ||
segment_recos = np.zeros((n_segments, n), dtype = np.int64) | ||
for i in range(n_segments): | ||
mean_probas = np.mean(expit(np.take(self.user_features, np.where(self.user_segment == i)[0], axis = 0).dot(self.playlist_features.T)), axis = 0) | ||
reward = 1 - reduce(lambda x,y : x * y, 1 + np.sort(-mean_probas)[:n]) | ||
segment_recos[i] = np.argsort(-mean_probas)[:n] | ||
return segment_recos | ||
|
||
# Computes highest expected reward for each segment | ||
def compute_segment_optimal_theoretical_rewards(self): | ||
n_users = self.user_features.shape[0] | ||
u = 0 | ||
step = 100000 | ||
segment_recos = self.compute_segment_optimal_recos(self.n_recos) | ||
while u < n_users: | ||
users_ids = range(u, min(n_users, u+ step)) | ||
user_segment = np.take(self.user_segment, users_ids) | ||
opt_recos = np.take(segment_recos, user_segment, axis = 0) | ||
opt_rewards = self.compute_theoretical_rewards(users_ids, opt_recos) | ||
self.th_segment_rewards[u:min(n_users, u+ step)] = opt_rewards | ||
u += step | ||
return | ||
|
||
# Given a list of users and their respective list of recos (each of size self.n_recos), computes | ||
# corresponding simulated reward | ||
def simulate_batch_users_reward(self, batch_user_ids, batch_recos): | ||
|
||
# First, compute probability of streaming each reco and draw rewards accordingly | ||
batch_user_features = np.take(self.user_features, batch_user_ids, axis = 0) | ||
batch_playlist_features = np.take(self.playlist_features, batch_recos, axis = 0) | ||
n_users = len(batch_user_ids) | ||
n = len(batch_recos[0]) | ||
probas = np.zeros((n_users, n)) | ||
for i in range(n_users): | ||
probas[i] = expit(batch_user_features[i].dot(batch_playlist_features[i].T)) # probability to stream each reco | ||
rewards = np.zeros((n_users, n)) | ||
i = 0 | ||
rewards_uncascaded = np.random.binomial(1, probas) # drawing rewards from probabilities | ||
positive_rewards = set() | ||
|
||
# Then, for each user, positive rewards after the first one are set to 0 (and playlists as "unseen" subsequently) | ||
# to imitate a cascading browsing behavior | ||
# (nonetheless, users can be drawn several times in the batch of a same round ; therefore, each user | ||
# can have several positive rewards - i.e. stream several playlists - in a same round, consistently with | ||
# the multiple-plays framework from the paper) | ||
nz = rewards_uncascaded.nonzero() | ||
for i in range(len(nz[0])): | ||
if nz[0][i] not in positive_rewards: | ||
rewards[nz[0][i]][nz[1][i]] = 1 | ||
positive_rewards.add(nz[0][i]) | ||
return rewards |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
from environment import ContextualEnvironment | ||
from policies import KLUCBSegmentPolicy, RandomPolicy, ExploreThenCommitSegmentPolicy, EpsilonGreedySegmentPolicy, TSSegmentPolicy, LinearTSPolicy | ||
import argparse | ||
import json | ||
import logging | ||
import numpy as np | ||
import pandas as pd | ||
import time | ||
|
||
# List of implemented policies | ||
def set_policies(policies_name, user_segment, user_features, n_playlists): | ||
# Please see section 3.3 of RecSys paper for a description of policies | ||
POLICIES_SETTINGS = { | ||
'random' : RandomPolicy(n_playlists), | ||
'etc-seg-explore' : ExploreThenCommitSegmentPolicy(user_segment, n_playlists, min_n = 100, cascade_model = True), | ||
'etc-seg-exploit' : ExploreThenCommitSegmentPolicy(user_segment, n_playlists, min_n = 20, cascade_model = True), | ||
'epsilon-greedy-explore' : EpsilonGreedySegmentPolicy(user_segment, n_playlists, epsilon = 0.1, cascade_model = True), | ||
'epsilon-greedy-exploit' : EpsilonGreedySegmentPolicy(user_segment, n_playlists, epsilon = 0.01, cascade_model = True), | ||
'kl-ucb-seg' : KLUCBSegmentPolicy(user_segment, n_playlists, cascade_model = True), | ||
'ts-seg-naive' : TSSegmentPolicy(user_segment, n_playlists, alpha_zero = 1, beta_zero = 1, cascade_model = True), | ||
'ts-seg-pessimistic' : TSSegmentPolicy(user_segment, n_playlists, alpha_zero = 1, beta_zero = 99, cascade_model = True), | ||
'ts-lin-naive' : LinearTSPolicy(user_features, n_playlists, bias = 0.0, cascade_model = True), | ||
'ts-lin-pessimistic' : LinearTSPolicy(user_features, n_playlists, bias = -5.0, cascade_model = True), | ||
# Versions of epsilon-greedy-explore and ts-seg-pessimistic WITHOUT cascade model | ||
'epsilon-greedy-explore-no-cascade' : EpsilonGreedySegmentPolicy(user_segment, n_playlists, epsilon = 0.1, cascade_model = False), | ||
'ts-seg-pessimistic-no-cascade' : TSSegmentPolicy(user_segment, n_playlists, alpha_zero = 1, beta_zero = 99, cascade_model = False) | ||
} | ||
|
||
return [POLICIES_SETTINGS[name] for name in policies_name] | ||
|
||
|
||
if __name__ == "__main__": | ||
|
||
# Arguments | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--users_path", type = str, default = "data/user_features.csv", required = False, | ||
help = "Path to user features file") | ||
parser.add_argument("--playlists_path", type = str, default = "data/playlist_features.csv", required = False, | ||
help = "Path to playlist features file") | ||
parser.add_argument("--output_path", type = str, default = "results.json", required = False, | ||
help = "Path to json file to save regret values") | ||
parser.add_argument("--policies", type = str, default = "random,ts-seg-naive", required = False, | ||
help = "Bandit algorithms to evaluate, separated by commas") | ||
parser.add_argument("--n_recos", type = int, default = 12, required = False, | ||
help = "Number of slots L in the carousel i.e. number of recommendations to provide") | ||
parser.add_argument("--l_init", type = int, default = 3, required = False, | ||
help = "Number of slots L_init initially visible in the carousel") | ||
parser.add_argument("--n_users_per_round", type = int, default = 20000, required = False, | ||
help = "Number of users randomly selected (with replacement) per round") | ||
parser.add_argument("--n_rounds", type = int, default = 100, required = False, | ||
help = "Number of simulated rounds") | ||
parser.add_argument("--print_every", type = int, default = 10, required = False, | ||
help = "Print cumulative regrets every 'print_every' round") | ||
|
||
args = parser.parse_args() | ||
|
||
logging.basicConfig(level = logging.INFO) | ||
logger = logging.getLogger(__name__) | ||
|
||
if args.l_init > args.n_recos: | ||
raise ValueError('l_init is larger than n_recos') | ||
|
||
|
||
# Data Loading and Preprocessing steps | ||
|
||
logger.info("LOADING DATA") | ||
logger.info("Loading playlist data") | ||
playlists_df = pd.read_csv(args.playlists_path) | ||
|
||
logger.info("Loading user data\n \n") | ||
users_df = pd.read_csv(args.users_path) | ||
|
||
n_users = len(users_df) | ||
n_playlists = len(playlists_df) | ||
n_recos = args.n_recos | ||
print_every = args.print_every | ||
|
||
user_features = np.array(users_df.drop(["segment"], axis = 1)) | ||
user_features = np.concatenate([user_features, np.ones((n_users,1))], axis = 1) | ||
playlist_features = np.array(playlists_df) | ||
|
||
user_segment = np.array(users_df.segment) | ||
|
||
logger.info("SETTING UP SIMULATION ENVIRONMENT") | ||
logger.info("for %d users, %d playlists, %d recommendations per carousel \n \n" % (n_users, n_playlists, n_recos)) | ||
|
||
cont_env = ContextualEnvironment(user_features, playlist_features, user_segment, n_recos) | ||
|
||
logger.info("SETTING UP POLICIES") | ||
logger.info("Policies to evaluate: %s \n \n" % (args.policies)) | ||
|
||
policies_name = args.policies.split(",") | ||
policies = set_policies(policies_name, user_segment, user_features, n_playlists) | ||
n_policies = len(policies) | ||
n_users_per_round = args.n_users_per_round | ||
n_rounds = args.n_rounds | ||
overall_rewards = np.zeros((n_policies, n_rounds)) | ||
overall_optimal_reward = np.zeros(n_rounds) | ||
|
||
|
||
# Simulations for Top-n_recos carousel-based playlist recommendations | ||
|
||
logger.info("STARTING SIMULATIONS") | ||
logger.info("for %d rounds, with %d users per round (randomly drawn with replacement)\n \n" % (n_rounds, n_users_per_round)) | ||
start_time = time.time() | ||
for i in range(n_rounds): | ||
# Select batch of n_users_per_round users | ||
user_ids = np.random.choice(range(n_users), n_users_per_round) | ||
overall_optimal_reward[i] = np.take(cont_env.th_rewards, user_ids).sum() | ||
# Iterate over all policies | ||
for j in range(n_policies): | ||
# Compute n_recos recommendations | ||
recos = policies[j].recommend_to_users_batch(user_ids, args.n_recos, args.l_init) | ||
# Compute rewards | ||
rewards = cont_env.simulate_batch_users_reward(batch_user_ids= user_ids, batch_recos=recos) | ||
# Update policy based on rewards | ||
policies[j].update_policy(user_ids, recos, rewards, args.l_init) | ||
overall_rewards[j,i] = rewards.sum() | ||
# Print info | ||
if i == 0 or (i+1) % print_every == 0 or i+1 == n_rounds: | ||
logger.info("Round: %d/%d. Elapsed time: %f sec." % (i+1, n_rounds, time.time() - start_time)) | ||
logger.info("Cumulative regrets: \n%s \n" % "\n".join([" %s : %s" % (policies_name[j], str(np.sum(overall_optimal_reward - overall_rewards[j]))) for j in range(n_policies)])) | ||
|
||
|
||
# Save results | ||
|
||
logger.info("Saving cumulative regrets in %s" % args.output_path) | ||
cumulative_regrets = {policies_name[j] : list(np.cumsum(overall_optimal_reward - overall_rewards[j])) for j in range(n_policies)} | ||
with open(args.output_path, 'w') as fp: | ||
json.dump(cumulative_regrets, fp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
from scipy.optimize import minimize | ||
import numpy as np | ||
|
||
|
||
# Disclaimer: this class is taken from: | ||
# https://gdmarmerola.github.io/ts-for-contextual-bandits/ | ||
|
||
|
||
# Defining a class for Online Bayesian Logistic Regression | ||
class OnlineLogisticRegression: | ||
|
||
# Initializing | ||
def __init__(self, lambda_, alpha, n_dim, bias, maxiter = 15): | ||
|
||
# Hyperparameter: deviation on the prior (L2 regularizer) | ||
self.lambda_ = lambda_; self.alpha = alpha; self.maxiter = maxiter | ||
|
||
# Initializing parameters of the model | ||
self.n_dim = n_dim | ||
# m: mean of the Bi, q inverse variance of the distribution | ||
self.m = np.zeros(self.n_dim) | ||
self.m[-1] = bias | ||
self.q = np.ones(self.n_dim) * self.lambda_ | ||
|
||
# Initializing weights | ||
self.w = np.random.normal(self.m, self.alpha * (self.q)**(-1.0), size = self.n_dim) | ||
|
||
# Loss function | ||
def loss(self, w, *args): | ||
X, y = args | ||
# Note: the bias is removed from the "regularization term" of the loss | ||
return 0.5 * (self.q[:-1] * (w[:-1] - self.m[:-1])).dot(w[:-1] - self.m[:-1]) + np.sum([np.log(1 + np.exp(-y[j] * w.dot(X[j]))) for j in range(y.shape[0])]) | ||
|
||
# Gradient | ||
def grad(self, w, *args): | ||
X, y = args | ||
return np.concatenate((self.q[:-1] * (w[:-1] - self.m[:-1]),0.0),axis = None) + (-1) * np.array([y[j] * X[j] / (1. + np.exp(y[j] * w.dot(X[j]))) for j in range(y.shape[0])]).sum(axis = 0) | ||
|
||
# Fitting method | ||
def fit(self, X, y): | ||
|
||
# Step 1, find w | ||
self.w = minimize(self.loss, self.w, args = (X, y), jac = self.grad, method = "L-BFGS-B", options = {'maxiter': self.maxiter}).x | ||
self.m = self.w | ||
|
||
# Step 2, update q | ||
P = (1 + np.exp(1 - X.dot(self.m))) ** (-1) | ||
self.q = self.q + (P*(1-P)).dot(X ** 2) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
import argparse | ||
import json | ||
import matplotlib.pyplot as plt | ||
import numpy as np | ||
import seaborn as sns | ||
sns.set_style(style='darkgrid') | ||
|
||
|
||
# Plots the evolution of expected cumulative regrets curves, | ||
# for all tested policies and over all rounds | ||
if __name__ == "__main__": | ||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--data_path", type=str, default="results.json", required=False, | ||
help="path to data") | ||
|
||
args = parser.parse_args() | ||
|
||
with open(args.data_path, 'r') as fp: | ||
cumulative_regrets = json.load(fp) | ||
|
||
for k,v in cumulative_regrets.items(): | ||
sns.lineplot(data = np.array(v), label=k) | ||
plt.xlabel("Round") | ||
plt.ylabel("Cumulative Regret") | ||
plt.show() |
Oops, something went wrong.