# Source: https://github.com/kaleko/CourseraML/tree/master/ex7
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import matplotlib
import scipy.optimize as opt
from google.colab import drive
drive.mount('/content/drive')
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
ex7data1 = sio.loadmat('/content/drive/My Drive/AndrewNg-ML/ex7data1.mat');
X = ex7data1['X']
#Quick plot
plt.figure(figsize=(7,5))
plot = plt.scatter(X[:,0], X[:,1], s=30, facecolors='none', edgecolors='b')
plt.title("Example Dataset",fontsize=18)
plt.grid(True)
def featureNormalize(X):
#Feature-normalize X, return it
means = np.mean(X,axis=0)
X_norm = X - means
stds = np.std(X_norm,axis=0)
X_norm = X_norm / stds
return means, stds, X_norm
import scipy.linalg
def getUSV(X_norm):
# Compute the covariance matrix
cov_matrix = X_norm.T.dot(X_norm) / X_norm.shape[0]
# Run single value decomposition to get the U principal component matrix
U, S, V = scipy.linalg.svd(cov_matrix, full_matrices = True, compute_uv = True)
return U, S, V
# Feature normalize
means, stds, X_norm = featureNormalize(X)
# Run SVD
U, S, V = getUSV(X_norm)
# "...output the top principal component (eigen- vector) found,
# and you should expect to see an output of about [-0.707 -0.707]"
print ('Top principal component is: ')
print(U[:,0])
Top principal component is: [-0.70710678 -0.70710678]
#Quick plot, now including the principal component
plt.figure(figsize=(7,5))
plot = plt.scatter(X[:,0], X[:,1], s=30, facecolors='none', edgecolors='b')
plt.title("Example Dataset: PCA Eigenvectors Shown",fontsize=18)
plt.xlabel('x1',fontsize=18)
plt.ylabel('x2',fontsize=18)
plt.grid(True)
#To draw the principal component, you draw them starting
#at the mean of the data
plt.plot([means[0], means[0] + 1.5*S[0]*U[0,0]],
[means[1], means[1] + 1.5*S[0]*U[0,1]],
color='red',linewidth=3,
label='First Principal Component')
plt.plot([means[0], means[0] + 1.5*S[1]*U[1,0]],
[means[1], means[1] + 1.5*S[1]*U[1,1]],
color='fuchsia',linewidth=3,
label='Second Principal Component')
leg = plt.legend(loc=4)
def projectData(X, U, K):
"""
Function that computes the reduced data representation when
projecting only on to the top "K" eigenvectors
"""
#Reduced U is the first "K" columns in U
Ureduced = U[:,:K]
z = X.dot(Ureduced)
return z
# "...project the first example onto the first dimension
# "and you should see a value of about 1.481"
z = projectData(X_norm, U, 1)
print ('Projection of the first example is: ')
print(float(z[0]))
Projection of the first example is: 1.4963126084578522
def recoverData(Z, U, K):
Ureduced = U[:,:K]
Xapprox = Z.dot(Ureduced.T)
return Xapprox
X_recovred = recoverData(z,U,1)
print ('Recovered approximation of the first example is: ')
print(X_recovred[0])
Recovered approximation of the first example is: [-1.05805279 -1.05805279]
#Quick plot, now drawing projected points to the original points
plt.figure(figsize=(7,5))
plot = plt.scatter(X_norm[:,0], X_norm[:,1], s=30, facecolors='none',
edgecolors='b',label='Original Data Points')
plot = plt.scatter(X_recovred[:,0], X_recovred[:,1], s=30, facecolors='none',
edgecolors='r',label='PCA Reduced Data Points')
plt.title("Example Dataset: Reduced Dimension Points Shown",fontsize=14)
plt.xlabel('x1 [Feature Normalized]',fontsize=14)
plt.ylabel('x2 [Feature Normalized]',fontsize=14)
plt.grid(True)
for x in range(X_norm.shape[0]):
plt.plot([X_norm[x,0],X_recovred[x,0]],[X_norm[x,1],X_recovred[x,1]],'k--')
leg = plt.legend(loc=4)
#Force square axes to make projections look better
dummy = plt.xlim((-2.5,2.5))
dummy = plt.ylim((-2.5,2.5))
face = sio.loadmat('/content/drive/My Drive/AndrewNg-ML/ex7faces.mat');
X_face = face['X']
!pip3 install scipy==1.2.0
import scipy
print(scipy.__version__)
Requirement already satisfied: scipy==1.2.0 in /usr/local/lib/python3.7/dist-packages (1.2.0) Requirement already satisfied: numpy>=1.8.2 in /usr/local/lib/python3.7/dist-packages (from scipy==1.2.0) (1.19.5) 1.2.0
import scipy.misc
import matplotlib.cm as cm
def getDatumImg(row):
"""
Function that is handed a single np array with shape 1x1032,
crates an image object from it, and returns it
"""
width, height = 32, 32
square = row.reshape(width,height)
return square.T
def displayData(X, nrows = 10, ncols = 10):
"""
Function that picks the first 100 rows from X, creates an image from each,
then stitches them together into a 10x10 grid of images, and shows it.
"""
width, height = 32, 32
nrows, ncols = nrows, ncols
big_picture = np.zeros((height*nrows,width*ncols))
irow, icol = 0, 0
for idx in range(nrows*ncols):
if icol == ncols:
irow += 1
icol = 0
iimg = getDatumImg(X[idx])
big_picture[irow*height:irow*height+iimg.shape[0], icol*width:icol*width+iimg.shape[1]] = iimg
icol += 1
fig = plt.figure(figsize=(10,10))
img = scipy.misc.toimage(big_picture)
plt.imshow(img,cmap = cm.Greys_r)
displayData(X_face)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:32: DeprecationWarning: `toimage` is deprecated! `toimage` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0. Use Pillow's ``Image.fromarray`` directly instead.
# Feature normalize
means, stds, X_norm = featureNormalize(X_face)
# Run SVD
U, S, V = getUSV(X_norm)
# Visualize the top 36 eigenvectors found
# "Eigenfaces" lol
displayData(U[:,:36].T, nrows=6, ncols=6)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:32: DeprecationWarning: `toimage` is deprecated! `toimage` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0. Use Pillow's ``Image.fromarray`` directly instead.
# Project each image down to 36 dimensions
z = projectData(X_norm, U, K=36)
# Attempt to recover the original data
X_rec = recoverData(z, U, K=36)
# Plot the dimension-reduced data
displayData(X_rec)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:32: DeprecationWarning: `toimage` is deprecated! `toimage` is deprecated in SciPy 1.0.0, and will be removed in 1.2.0. Use Pillow's ``Image.fromarray`` directly instead.