# Reference: https://github.com/kaleko/CourseraML/tree/master/ex8


import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import matplotlib
import scipy.optimize as opt


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


ex8_movies = sio.loadmat('/content/drive/My Drive/AndrewNg-ML/ex8_movies.mat')


Y = ex8_movies['Y']
R = ex8_movies['R']

nm, nu = Y.shape


print ('Average rating for movie 1 (Toy Story):')
print(np.mean([ Y[0][x] for x in range(Y.shape[1]) if R[0][x] ]))

# "Visualize the ratings matrix"
fig = plt.figure(figsize=(6,6*(1682./943.)))
dummy = plt.imshow(Y)
dummy = plt.colorbar()
dummy = plt.ylabel('Movies (%d)'%nm,fontsize=20)
dummy = plt.xlabel('Users (%d)'%nu,fontsize=20)

Average rating for movie 1 (Toy Story):
3.8783185840707963


# Read in the movie params matrices
ex8_movieParams = sio.loadmat('/content/drive/My Drive/AndrewNg-ML/ex8_movieParams.mat')
X = ex8_movieParams['X']
Theta = ex8_movieParams['Theta']
nu = int(ex8_movieParams['num_users'])
nm = int(ex8_movieParams['num_movies'])
nf = int(ex8_movieParams['num_features'])

# For now, reduce the data set size so that this runs faster
nu = 4; nm = 5; nf = 3
X = X[:nm,:nf]
Theta = Theta[:nu,:nf]
Y = Y[:nm,:nu]
R = R[:nm,:nu]

X

array([[ 1.0486855 , -0.40023196,  1.19411945],
       [ 0.78085123, -0.38562591,  0.52119779],
       [ 0.64150886, -0.54785385, -0.08379638],
       [ 0.45361782, -0.80021844,  0.68048129],
       [ 0.93753789,  0.1060899 ,  0.36195295]])


X.flatten()

array([ 1.0486855 , -0.40023196,  1.19411945,  0.78085123, -0.38562591,
        0.52119779,  0.64150886, -0.54785385, -0.08379638,  0.45361782,
       -0.80021844,  0.68048129,  0.93753789,  0.1060899 ,  0.36195295])


np.concatenate((X.flatten(),Theta.flatten()))

array([ 1.0486855 , -0.40023196,  1.19411945,  0.78085123, -0.38562591,
        0.52119779,  0.64150886, -0.54785385, -0.08379638,  0.45361782,
       -0.80021844,  0.68048129,  0.93753789,  0.1060899 ,  0.36195295,
        0.28544362, -1.68426509,  0.26293877,  0.50501321, -0.45464846,
        0.31746244, -0.43191656, -0.47880449,  0.84671111,  0.72859839,
       -0.27189391,  0.3268436 ])


# The "parameters" we are minimizing are both the elements of the
# X matrix (nm*nf) and of the Theta matrix (nu*nf)
# To use off-the-shelf minimizers we need to flatten these matrices
# into one long array
def flattenParams(myX, myTheta):
    """
    Hand this function an X matrix and a Theta matrix and it will flatten
    it into into one long (nm*nf + nu*nf,1) shaped numpy array
    """
    return np.concatenate((myX.flatten(),myTheta.flatten()))

# A utility function to re-shape the X and Theta will probably come in handy
def reshapeParams(flattened_XandTheta, mynm, mynu, mynf):
    assert flattened_XandTheta.shape[0] == int(nm*nf+nu*nf)
    
    reX = flattened_XandTheta[:int(mynm*mynf)].reshape((mynm,mynf))
    reTheta = flattened_XandTheta[int(mynm*mynf):].reshape((mynu,mynf))
    
    return reX, reTheta


def cofiCostFunc(myparams, myY, myR, mynu, mynm, mynf, mylambda = 0.):
    
    # Unfold the X and Theta matrices from the flattened params
    myX, myTheta = reshapeParams(myparams, mynm, mynu, mynf)
  
    # Note: 
    # X Shape is (nm x nf), Theta shape is (nu x nf), Y and R shape is (nm x nu)
    # Behold! Complete vectorization
    
    # First dot theta and X together such that you get a matrix the same shape as Y
    term1 = myX.dot(myTheta.T)
    
    # Then element-wise multiply that matrix by the R matrix
    # so only terms from movies which that user rated are counted in the cost
    term1 = np.multiply(term1,myR)
    
    # Then subtract the Y- matrix (which has 0 entries for non-rated
    # movies by each user, so no need to multiply that by myR... though, if
    # a user could rate a movie "0 stars" then myY would have to be element-
    # wise multiplied by myR as well) 
    # also square that whole term, sum all elements in the resulting matrix,
    # and multiply by 0.5 to get the cost
    cost = 0.5 * np.sum( np.square(term1-myY) )
    
    # Regularization stuff
    cost += (mylambda/2.) * np.sum(np.square(myTheta))
    cost += (mylambda/2.) * np.sum(np.square(myX))
    
    return cost


# "...run your cost function. You should expect to see an output of 22.22."
print ('Cost with nu = 4, nm = 5, nf = 3 is ')
print (cofiCostFunc(flattenParams(X,Theta),Y,R,nu,nm,nf))
    
# "...with lambda = 1.5 you should expect to see an output of 31.34."
print ('Cost with nu = 4, nm = 5, nf = 3 (and lambda = 1.5) is ')
print(cofiCostFunc(flattenParams(X,Theta), Y, R, nu, nm, nf, mylambda=1.5))

Cost with nu = 4, nm = 5, nf = 3 is 
22.224603725685675
Cost with nu = 4, nm = 5, nf = 3 (and lambda = 1.5) is 
31.34405624427422


# Remember: use the exact same input arguments for gradient function
# as for the cost function (the off-the-shelf minimizer requires this)
def cofiGrad(myparams, myY, myR, mynu, mynm, mynf, mylambda = 0.):
    
    # Unfold the X and Theta matrices from the flattened params
    myX, myTheta = reshapeParams(myparams, mynm, mynu, mynf)

    # First the X gradient term 
    # First dot theta and X together such that you get a matrix the same shape as Y
    term1 = myX.dot(myTheta.T)
    # Then multiply this term by myR to remove any components from movies that
    # weren't rated by that user
    term1 = np.multiply(term1,myR)
    # Now subtract the y matrix (which already has 0 for nonrated movies)
    term1 -= myY
    # Lastly dot this with Theta such that the resulting matrix has the
    # same shape as the X matrix
    Xgrad = term1.dot(myTheta)
    
    # Now the Theta gradient term (reusing the "term1" variable)
    Thetagrad = term1.T.dot(myX)

    # Regularization stuff
    Xgrad += mylambda * myX
    Thetagrad += mylambda * myTheta
    
    return flattenParams(Xgrad, Thetagrad)


#Let's check my gradient computation real quick:
def checkGradient(myparams, myY, myR, mynu, mynm, mynf, mylambda = 0.):
    
    print ('Numerical Gradient \t cofiGrad \t\t Difference')
    
    # Compute a numerical gradient with an epsilon perturbation vector
    myeps = 0.0001
    nparams = len(myparams)
    epsvec = np.zeros(nparams)
    # These are my implemented gradient solutions
    mygrads = cofiGrad(myparams,myY,myR,mynu,mynm,mynf,mylambda)

    # Choose 10 random elements of my combined (X, Theta) param vector
    # and compute the numerical gradient for each... print to screen
    # the numerical gradient next to the my cofiGradient to inspect
    
    for i in range(10):
        idx = np.random.randint(0,nparams)
        epsvec[idx] = myeps
        loss1 = cofiCostFunc(myparams-epsvec,myY,myR,mynu,mynm,mynf,mylambda)
        loss2 = cofiCostFunc(myparams+epsvec,myY,myR,mynu,mynm,mynf,mylambda)
        mygrad = (loss2 - loss1) / (2*myeps)
        epsvec[idx] = 0
        print (mygrad, mygrads[idx],mygrad - mygrads[idx])


print ("Checking gradient with lambda = 0...")
checkGradient(flattenParams(X,Theta),Y,R,nu,nm,nf)
print ("\nChecking gradient with lambda = 1.5...")
checkGradient(flattenParams(X,Theta),Y,R,nu,nm,nf,mylambda = 1.5)

Checking gradient with lambda = 0...
Numerical Gradient 	 cofiGrad 		 Difference
4.742718424690651 4.742718424695921 -5.270450742500543e-12
-10.568020204448914 -10.568020204450614 1.6999734953060397e-12
3.3526503128555873 3.352650312849549 6.038280986331301e-12
-0.35334048288149233 -0.35334048287506953 -6.422806730910224e-12
4.742718424690651 4.742718424695921 -5.270450742500543e-12
3.3526503128555873 3.352650312849549 6.038280986331301e-12
-2.5289916460913275 -2.5289916460823334 -8.994138767093318e-12
-0.8324071330889637 -0.832407133096985 8.021250330614293e-12
4.62776019000799 4.627760190006159 1.8314239014216582e-12
-3.4741078867206454 -3.474107886729185 8.539391416206854e-12

Checking gradient with lambda = 1.5...
Numerical Gradient 	 cofiGrad 		 Difference
2.1013625613619524 2.1013625613886817 -2.672928545166542e-11
-0.8924733436010968 -0.8924733435974324 -3.664402115077792e-12
0.6030808809853738 0.6030808809792956 6.078249015217807e-12
0.12985615718719146 0.12985615716368837 2.3503088364407176e-11
4.089852197868282 4.089852197893254 -2.497202444828872e-11
1.0928975776991479 1.0928975776883072 1.0840661701649879e-11
4.901853273224788 4.901853273231165 -6.377121053446899e-12
2.1013625613619524 2.1013625613886817 -2.672928545166542e-11
2.1013625613619524 2.1013625613886817 -2.672928545166542e-11
0.2968439523343136 0.2968439523462539 -1.1940282096389865e-11


movies = []
movies_id = "/content/drive/My Drive/AndrewNg-ML/movie_ids.txt"
f = open(movies_id, "r", encoding = "ISO-8859-1")

for line in f:
    movies.append(' '.join(line.strip('\n').split(' ')[1:]))

# Rather than rate some movies myself, I'll use what was built-in to the homework
# (just so I can check my solutions)
my_ratings = np.zeros((1682,1))
my_ratings[0]   = 4
my_ratings[97]  = 2
my_ratings[6]   = 3
my_ratings[11]  = 5
my_ratings[53]  = 4
my_ratings[63]  = 5
my_ratings[65]  = 3
my_ratings[68]  = 5
my_ratings[182] = 4
my_ratings[225] = 5
my_ratings[354] = 5


Y = ex8_movies['Y']
R = ex8_movies['R']
# We'll use 10 features
nf = 10


# Add my ratings to the Y matrix, and the relevant row to the R matrix
myR_row = my_ratings > 0
Y = np.hstack((Y,my_ratings))
R = np.hstack((R,myR_row))
nm, nu = Y.shape


def normalizeRatings(myY, myR):
    """
    Preprocess data by subtracting mean rating for every movie (every row)
    This is important because without this, a user who hasn't rated any movies
    will have a predicted score of 0 for every movie, when in reality
    they should have a predicted score of [average score of that movie].
    """

    # The mean is only counting movies that were rated
    Ymean = np.sum(myY,axis=1)/np.sum(myR,axis=1)
    Ymean = Ymean.reshape((Ymean.shape[0],1))
    
    return myY-Ymean, Ymean


Ynorm, Ymean = normalizeRatings(Y,R)


# Generate random initial parameters, Theta and X
X = np.random.rand(nm,nf)
Theta = np.random.rand(nu,nf)
myflat = flattenParams(X, Theta)

# Regularization parameter of 10 is used (as used in the homework assignment)
mylambda = 10.

# Training the actual model with fmin_cg
result = opt.fmin_cg(cofiCostFunc, x0=myflat, fprime=cofiGrad, \
                               args=(Y,R,nu,nm,nf,mylambda), \
                                maxiter=50,disp=True,full_output=True)

Warning: Maximum number of iterations has been exceeded.
         Current function value: 72974.851036
         Iterations: 50
         Function evaluations: 72
         Gradient evaluations: 72


# Reshape the trained output into sensible "X" and "Theta" matrices
resX, resTheta = reshapeParams(result[0], nm, nu, nf)


# After training the model, now make recommendations by computing
# the predictions matrix
prediction_matrix = resX.dot(resTheta.T)


# Grab the last user's predictions (since I put my predictions at the
# end of the Y matrix, not the front)
# Add back in the mean movie ratings
my_predictions = prediction_matrix[:,-1] + Ymean.flatten()


# Sort my predictions from highest to lowest
pred_idxs_sorted = np.argsort(my_predictions)
pred_idxs_sorted[:] = pred_idxs_sorted[::-1]

print ("Top recommendations for you:")
for i in range(10):
    print ('Predicting rating %0.1f for movie %s.' % \
    (my_predictions[pred_idxs_sorted[i]],movies[pred_idxs_sorted[i]]))
    
print ("\nOriginal ratings provided:")
for i in range(len(my_ratings)):
    if my_ratings[i] > 0:
        print ('Rated %d for movie %s.' % (my_ratings[i],movies[i]))

Top recommendations for you:
Predicting rating 8.5 for movie Star Wars (1977).
Predicting rating 8.3 for movie Shawshank Redemption, The (1994).
Predicting rating 8.2 for movie Raiders of the Lost Ark (1981).
Predicting rating 8.2 for movie Titanic (1997).
Predicting rating 8.2 for movie Usual Suspects, The (1995).
Predicting rating 8.1 for movie Schindler's List (1993).
Predicting rating 8.1 for movie Wrong Trousers, The (1993).
Predicting rating 8.1 for movie Close Shave, A (1995).
Predicting rating 8.1 for movie Good Will Hunting (1997).
Predicting rating 8.0 for movie Empire Strikes Back, The (1980).

Original ratings provided:
Rated 4 for movie Toy Story (1995).
Rated 3 for movie Twelve Monkeys (1995).
Rated 5 for movie Usual Suspects, The (1995).
Rated 4 for movie Outbreak (1995).
Rated 5 for movie Shawshank Redemption, The (1994).
Rated 3 for movie While You Were Sleeping (1995).
Rated 5 for movie Forrest Gump (1994).
Rated 2 for movie Silence of the Lambs, The (1991).
Rated 4 for movie Alien (1979).
Rated 5 for movie Die Hard 2 (1990).
Rated 5 for movie Sphere (1998).