# Reference: https://github.com/fengdu78/Coursera-ML-AndrewNg-Notes


import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import matplotlib
import scipy.optimize as opt
from sklearn.metrics import classification_report


from google.colab import files
import io


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


ex3data1 = sio.loadmat('/content/drive/My Drive/AndrewNg-ML/ex3data1.mat');


def load_data(data, transpose=True):
    y = data.get('y')  # (5000,1)
    y = y.reshape(y.shape[0])  # make it back to column vector

    X = data.get('X')  # (5000,400)

    if transpose:
        # for this dataset, you need a transpose to get the orientation right
        X = np.array([image.reshape((20, 20)).T for image in X])

        # and I flat the image again to preserve the vector presentation
        X = np.array([image.reshape(400) for image in X])

    return X, y


X, y = load_data(ex3data1)
print(X.shape)
print(y.shape)

(5000, 400)
(5000,)


def plot_an_image(image):
    fig, ax = plt.subplots(figsize=(1, 1))
    ax.matshow(image.reshape((20, 20)), cmap=matplotlib.cm.binary)
    plt.xticks(np.array([]))  # get rid of ticks
    plt.yticks(np.array([]))


pick_one = np.random.randint(0, 5000)
plot_an_image(X[pick_one, :])
plt.show()
print('this should be {}'.format(y[pick_one]))

this should be 2


def plot_100_image(X):
    size = int(np.sqrt(X.shape[1]))

    sample_idx = np.random.choice(np.arange(X.shape[0]), 100)  # 100*400
    sample_images = X[sample_idx, :]

    fig, ax_array = plt.subplots(nrows=10, ncols=10, sharey=True, sharex=True, figsize=(8, 8))

    for r in range(10):
        for c in range(10):
            ax_array[r, c].matshow(sample_images[10 * r + c].reshape((size, size)),
                                   cmap=matplotlib.cm.binary)
            plt.xticks(np.array([]))
            plt.yticks(np.array([]))


plot_100_image(X)
plt.show()


raw_X, raw_y = load_data(ex3data1)
print(raw_X.shape)
print(raw_y.shape)

(5000, 400)
(5000,)


# add bias term 
X = np.insert(raw_X, 0, values=np.ones(raw_X.shape[0]), axis=1)
X.shape

(5000, 401)


# y=10 -> [0, 0, 0, 0, 0, 0, 0, 0, 0, 1]: ndarray
y_matrix = []

for k in range(1, 11):
    y_matrix.append((raw_y == k).astype(int))    

# last one is k==10, it's actually digit 0
# we have to concat the last col and the first 9 cols

y_matrix = [y_matrix[-1]] + y_matrix[:-1]
y = np.array(y_matrix)

y.shape

(10, 5000)


def cost(theta, X, y):
    return np.mean(-y * np.log(sigmoid(np.dot(X, theta))) - (1 - y) * np.log(1 - sigmoid(np.dot(X, theta))))


def regularized_cost(theta, X, y, l=1):
    '''you don't penalize theta_0'''
    theta_j1_to_n = theta[1:]
    regularized_term = (l / (2 * len(X))) * np.power(theta_j1_to_n, 2).sum()

    return cost(theta, X, y) + regularized_term


def gradient(theta, X, y):
    return (1 / len(X)) * X.T.dot((sigmoid(np.dot(X, theta)) - y))


def regularized_gradient(theta, X, y, l=1):
    theta_j1_to_n = theta[1:]
    regularized_theta = (l / len(X)) * theta_j1_to_n

    regularized_term = np.concatenate([np.array([0]), regularized_theta])

    return gradient(theta, X, y) + regularized_term


def sigmoid(z):
    return 1 / (1 + np.exp(-z))


def logistic_regression(X, y, l=1):
    """generalized logistic regression
    args:
        X: feature matrix, (m, n+1) # with incercept x0=1
        y: target vector, (m, )
        l: lambda constant for regularization

    return: trained parameters
    """
    # init theta
    theta = np.zeros(X.shape[1])

    # train it
    res = opt.minimize(fun=regularized_cost,
                       x0=theta,
                       args=(X, y, l),
                       method='TNC',
                       jac=regularized_gradient,
                       options={'disp': True})
    # get trained parameters
    final_theta = res.x

    return final_theta


def predict(x, theta):
    prob = sigmoid(x @ theta)
    return (prob >= 0.5).astype(int)


t0 = logistic_regression(X, y[0])
print(t0.shape)
y_pred = predict(X, t0)
print('Accuracy={}'.format(np.mean(y[0] == y_pred)))

(401,)
Accuracy=0.9974


k_theta = np.array([logistic_regression(X, y[k]) for k in range(10)])
print(k_theta.shape)

(10, 401)


prob_matrix = sigmoid(np.dot(X, k_theta.T))
prob_matrix

array([[9.95775193e-01, 2.39243194e-09, 5.34435764e-04, ...,
        6.47089832e-05, 3.91645855e-05, 1.72187992e-03],
       [9.98346277e-01, 9.73942015e-08, 5.60160999e-05, ...,
        9.68658421e-05, 2.90678976e-06, 8.48648647e-05],
       [9.91399426e-01, 2.58961425e-10, 5.68232687e-04, ...,
        6.53877895e-06, 2.65549223e-02, 1.96749851e-03],
       ...,
       [6.77847566e-07, 4.13756598e-02, 3.20983273e-03, ...,
        1.27199790e-04, 2.97633465e-03, 7.07933834e-01],
       [1.84316161e-05, 1.32020178e-07, 8.87453805e-08, ...,
        1.64786165e-03, 6.81524082e-02, 8.61145147e-01],
       [2.87828802e-02, 1.50033990e-10, 1.29642893e-04, ...,
        3.66241045e-01, 4.97879117e-03, 1.48189130e-01]])


prob_matrix.shape

(5000, 10)


y_pred = np.argmax(prob_matrix, axis=1)
y_pred

array([0, 0, 0, ..., 9, 9, 7])


y = raw_y.copy()
y

array([10, 10, 10, ...,  9,  9,  9], dtype=uint8)


y[y==10] = 0
y

array([0, 0, 0, ..., 9, 9, 9], dtype=uint8)


print(classification_report(y, y_pred))

              precision    recall  f1-score   support

           0       0.97      0.99      0.98       500
           1       0.95      0.99      0.97       500
           2       0.95      0.92      0.93       500
           3       0.95      0.91      0.93       500
           4       0.95      0.95      0.95       500
           5       0.92      0.92      0.92       500
           6       0.97      0.98      0.97       500
           7       0.95      0.95      0.95       500
           8       0.93      0.92      0.92       500
           9       0.92      0.92      0.92       500

    accuracy                           0.94      5000
   macro avg       0.94      0.94      0.94      5000
weighted avg       0.94      0.94      0.94      5000