# Reference: https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes
import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import matplotlib
import scipy.optimize as opt
from sklearn.metrics import classification_report
from google.colab import files
import io
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
ex3data1 = sio.loadmat('/content/drive/My Drive/AndrewNg-ML/ex3data1.mat');
def load_data(data, transpose=True):
y = data.get('y') # (5000,1)
y = y.reshape(y.shape[0]) # make it back to column vector
X = data.get('X') # (5000,400)
if transpose:
# for this dataset, you need a transpose to get the orientation right
X = np.array([image.reshape((20, 20)).T for image in X])
# and I flat the image again to preserve the vector presentation
X = np.array([image.reshape(400) for image in X])
return X, y
X, y = load_data(ex3data1)
print(X.shape)
print(y.shape)
(5000, 400) (5000,)
def plot_an_image(image):
fig, ax = plt.subplots(figsize=(1, 1))
ax.matshow(image.reshape((20, 20)), cmap=matplotlib.cm.binary)
plt.xticks(np.array([])) # get rid of ticks
plt.yticks(np.array([]))
pick_one = np.random.randint(0, 5000)
plot_an_image(X[pick_one, :])
plt.show()
print('this should be {}'.format(y[pick_one]))
this should be 2
def plot_100_image(X):
size = int(np.sqrt(X.shape[1]))
sample_idx = np.random.choice(np.arange(X.shape[0]), 100) # 100*400
sample_images = X[sample_idx, :]
fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(8, 8))
for r in range(10):
for c in range(10):
ax_array[r, c].matshow(sample_images[10 * r + c].reshape((size, size)),
cmap=matplotlib.cm.binary)
plt.xticks(np.array([]))
plt.yticks(np.array([]))
plot_100_image(X)
plt.show()
raw_X, raw_y = load_data(ex3data1)
print(raw_X.shape)
print(raw_y.shape)
(5000, 400) (5000,)
# add bias term
X = np.insert(raw_X, 0, values=np.ones(raw_X.shape[0]), axis=1)
X.shape
(5000, 401)
# y=10 -> [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]: ndarray
y_matrix = []
for k in range(1, 11):
y_matrix.append((raw_y == k).astype(int))
# last one is k==10, it's actually digit 0
# we have to concat the last col and the first 9 cols
y_matrix = [y_matrix[-1]] + y_matrix[:-1]
y = np.array(y_matrix)
y.shape
(10, 5000)
def cost(theta, X, y):
return np.mean(-y * np.log(sigmoid(np.dot(X, theta))) - (1 - y) * np.log(1 - sigmoid(np.dot(X, theta))))
def regularized_cost(theta, X, y, l=1):
'''you don't penalize theta_0'''
theta_j1_to_n = theta[1:]
regularized_term = (l / (2 * len(X))) * np.power(theta_j1_to_n, 2).sum()
return cost(theta, X, y) + regularized_term
def gradient(theta, X, y):
return (1 / len(X)) * X.T.dot((sigmoid(np.dot(X, theta)) - y))
def regularized_gradient(theta, X, y, l=1):
theta_j1_to_n = theta[1:]
regularized_theta = (l / len(X)) * theta_j1_to_n
regularized_term = np.concatenate([np.array([0]), regularized_theta])
return gradient(theta, X, y) + regularized_term
def sigmoid(z):
return 1 / (1 + np.exp(-z))
def logistic_regression(X, y, l=1):
"""generalized logistic regression
args:
X: feature matrix, (m, n+1) # with incercept x0=1
y: target vector, (m, )
l: lambda constant for regularization
return: trained parameters
"""
# init theta
theta = np.zeros(X.shape[1])
# train it
res = opt.minimize(fun=regularized_cost,
x0=theta,
args=(X, y, l),
method='TNC',
jac=regularized_gradient,
options={'disp': True})
# get trained parameters
final_theta = res.x
return final_theta
def predict(x, theta):
prob = sigmoid(x @ theta)
return (prob >= 0.5).astype(int)
t0 = logistic_regression(X, y[0])
print(t0.shape)
y_pred = predict(X, t0)
print('Accuracy={}'.format(np.mean(y[0] == y_pred)))
(401,) Accuracy=0.9974
k_theta = np.array([logistic_regression(X, y[k]) for k in range(10)])
print(k_theta.shape)
(10, 401)
prob_matrix = sigmoid(np.dot(X, k_theta.T))
prob_matrix
array([[9.95775193e-01, 2.39243194e-09, 5.34435764e-04, ..., 6.47089832e-05, 3.91645855e-05, 1.72187992e-03], [9.98346277e-01, 9.73942015e-08, 5.60160999e-05, ..., 9.68658421e-05, 2.90678976e-06, 8.48648647e-05], [9.91399426e-01, 2.58961425e-10, 5.68232687e-04, ..., 6.53877895e-06, 2.65549223e-02, 1.96749851e-03], ..., [6.77847566e-07, 4.13756598e-02, 3.20983273e-03, ..., 1.27199790e-04, 2.97633465e-03, 7.07933834e-01], [1.84316161e-05, 1.32020178e-07, 8.87453805e-08, ..., 1.64786165e-03, 6.81524082e-02, 8.61145147e-01], [2.87828802e-02, 1.50033990e-10, 1.29642893e-04, ..., 3.66241045e-01, 4.97879117e-03, 1.48189130e-01]])
prob_matrix.shape
(5000, 10)
y_pred = np.argmax(prob_matrix, axis=1)
y_pred
array([0, 0, 0, ..., 9, 9, 7])
y = raw_y.copy()
y
array([10, 10, 10, ..., 9, 9, 9], dtype=uint8)
y[y==10] = 0
y
array([0, 0, 0, ..., 9, 9, 9], dtype=uint8)
print(classification_report(y, y_pred))
precision recall f1-score support 0 0.97 0.99 0.98 500 1 0.95 0.99 0.97 500 2 0.95 0.92 0.93 500 3 0.95 0.91 0.93 500 4 0.95 0.95 0.95 500 5 0.92 0.92 0.92 500 6 0.97 0.98 0.97 500 7 0.95 0.95 0.95 500 8 0.93 0.92 0.92 500 9 0.92 0.92 0.92 500 accuracy 0.94 5000 macro avg 0.94 0.94 0.94 5000 weighted avg 0.94 0.94 0.94 5000