import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib
import scipy.optimize as opt
import seaborn as sns


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


data = sio.loadmat('/content/drive/My Drive/AndrewNg-ML/ex5data1.mat')


X = data['X']
y = data['y'] 
Xval = data['Xval'] 
yval = data['yval']
Xtest = data['Xtest']
ytest = data['ytest']


X.shape, y.shape

((12, 1), (12, 1))


X[:, 0]

array([-15.93675813, -29.15297922,  36.18954863,  37.49218733,
       -48.05882945,  -8.94145794,  15.30779289, -34.70626581,
         1.38915437, -44.38375985,   7.01350208,  22.76274892])

y

array([[ 2.13431051],
       [ 1.17325668],
       [34.35910918],
       [36.83795516],
       [ 2.80896507],
       [ 2.12107248],
       [14.71026831],
       [ 2.61418439],
       [ 3.74017167],
       [ 3.73169131],
       [ 7.62765885],
       [22.7524283 ]])


plt.figure(figsize=(8, 6))
plt.scatter(X.ravel(), y.ravel(), s=50, c='r', marker='x', linewidths=1)
# pandas has trouble taking this 2d ndarray to construct a dataframe
# so we have to ravel the data
plt.xlabel('Change in water level (x)')
plt.ylabel('Water flowing out of the dam (y)')
plt.ylim(ymin=0);


 def ravel_data(data):
  return map(np.ravel, [data['X'], data['y'], data['Xval'], data['yval'], data['Xtest'], data['ytest']])


X, y, Xval, yval, Xtest, ytest = ravel_data(data)
X

array([-15.93675813, -29.15297922,  36.18954863,  37.49218733,
       -48.05882945,  -8.94145794,  15.30779289, -34.70626581,
         1.38915437, -44.38375985,   7.01350208,  22.76274892])


# add bias term
X, Xval, Xtest = [np.insert(x.reshape(x.shape[0], 1), 0, np.ones(x.shape[0]), axis=1) for x in (X, Xval, Xtest)]


def linearRegCostFunction(theta, X, y, lamb = 1):
    m = y.size    
    h = X.dot(theta)   
    J = (1/(2*m)) * np.sum(np.square(h-y))  
    reg = (lamb/(2*m)) * np.sum(np.square(theta[1:]))
   
    return J + reg


theta = np.ones(X.shape[1])
linearRegCostFunction(theta, X, y)

303.9931922202643


def gradient(theta, X, y):
    m = X.shape[0]
    h_minus_y = X.dot(theta) - y
    sum_term = X.T.dot(h_minus_y)
    return sum_term / m


def regularized_gradient(theta, X, y, l=1):
    m = X.shape[0]

    regularized_term = theta.copy()  # same shape as theta
    regularized_term[0] = 0  # don't regularize intercept theta

    regularized_term = (l / m) * regularized_term

    return gradient(theta, X, y) + regularized_term


regularized_gradient(theta, X, y)

array([-15.30301567, 598.25074417])


def trainLinearReg(X, y, l=1):
    """linear regression
    args:
        X: feature matrix, (m, n+1) # with incercept x0=1
        y: target vector, (m, )
        l: lambda constant for regularization

    return: trained parameters
    """
    # init theta
    theta = np.ones(X.shape[1])

    # train it
    res = opt.minimize(fun = linearRegCostFunction,
                       x0 = theta,
                       args = (X, y, l),
                       method = 'TNC',
                       jac = regularized_gradient,  # Method for computing the gradient vector.
                       options={'disp': True})
    return res


res = trainLinearReg(X, y, l=0)
res

     fun: 22.37390649510892
     jac: array([ 1.52549682e-07, -7.76071267e-09])
 message: 'Converged (|f_n-f_(n-1)| ~= 0)'
    nfev: 9
     nit: 4
  status: 1
 success: True
       x: array([13.08790367,  0.36777923])


final_theta = res.x
final_theta

array([13.08790367,  0.36777923])


b = final_theta[0] # intercept
m = final_theta[1] # slope

plt.figure(figsize=(8, 6))
plt.scatter(X[:,1], y, label="Training data", c='r', marker='x', )
plt.plot(X[:, 1], X[:, 1]*m + b, label="Prediction")
plt.legend(loc=2)
plt.show()


def costFunction(theta, X, y):
    m = y.size    
    h = X.dot(theta)   
    J = (1/(2*m)) * np.sum(np.square(h-y))  
    return J


def learningCurve(X, y, Xval, yval, Lambda):
  error_train = []
  error_cross_val = []
  m = y.size
  for i in range(1, m+1):
     subsetX = X[0:i, :]
     subsetY = y[0:i]
     fit_theta = trainLinearReg(subsetX, subsetY, Lambda).x

     error_i_train = costFunction(fit_theta, subsetX, subsetY)
     error_i_cross_val = costFunction(fit_theta, Xval, yval)

     error_train.append(error_i_train)
     error_cross_val.append(error_i_cross_val)

  plt.figure(figsize=(10, 8))
  plt.plot(np.arange(1, m+1), error_train, label='training cost')
  plt.plot(np.arange(1, m+1), error_cross_val, label='cross validation cost')
  plt.legend(loc=1)
  plt.show()


learningCurve(X, y, Xval, yval, Lambda=0)


def prepare_poly_data(*args, power):
    """
    args: keep feeding in X, Xval, or Xtest
        will return in the same order
    """
    def prepare_for_each_dataset(x):
        # expand feature
        df = poly_features(x, power=power)

        # normalization
        ndarr = normalize_feature(df).values

        # add bias term
        return np.insert(ndarr, 0, np.ones(ndarr.shape[0]), axis=1)

    return [prepare_for_each_dataset(x) for x in args]


def poly_features(x, power, as_ndarray=False):
    data = {'f{}'.format(i): np.power(x, i) for i in range(1, power + 1)}
    df = pd.DataFrame(data)

    return df.values if as_ndarray else df


def normalize_feature(df):
    """Applies function along input axis(default 0) of DataFrame."""
    return df.apply(lambda column: (column - column.mean()) / column.std())


X, y, Xval, yval, Xtest, ytest = ravel_data(data)


poly_features(X, power=3)


X_poly, Xval_poly, Xtest_poly= prepare_poly_data(X, Xval, Xtest, power=8)
X_poly[:3, :]

array([[ 1.00000000e+00, -3.62140776e-01, -7.55086688e-01,
         1.82225876e-01, -7.06189908e-01,  3.06617917e-01,
        -5.90877673e-01,  3.44515797e-01, -5.08481165e-01],
       [ 1.00000000e+00, -8.03204845e-01,  1.25825266e-03,
        -2.47936991e-01, -3.27023420e-01,  9.33963187e-02,
        -4.35817606e-01,  2.55416116e-01, -4.48912493e-01],
       [ 1.00000000e+00,  1.37746700e+00,  5.84826715e-01,
         1.24976856e+00,  2.45311974e-01,  9.78359696e-01,
        -1.21556976e-02,  7.56568484e-01, -1.70352114e-01]])


learningCurve(X_poly, y, Xval_poly, yval, Lambda=0)
# lambda = 0


# lambda = 1
learningCurve(X_poly, y, Xval_poly, yval, Lambda=1)


# lambda = 100
learningCurve(X_poly, y, Xval_poly, yval, Lambda=100)


lambda_candidate = [0, 0.001, 0.003, 0.01, 0.03, 0.1, 0.3, 1, 3, 10]
training_cost, cross_validation_cost = [], []


for l in lambda_candidate:
    res = trainLinearReg(X_poly, y, l)
    fit_theta = res.x

    each_training_cost = linearRegCostFunction(fit_theta, X_poly, y, 0)
    each_cross_validation_cost = linearRegCostFunction(fit_theta, Xval_poly, yval, 0)
    
    training_cost.append(each_training_cost)
    cross_validation_cost.append(each_cross_validation_cost)


plt.figure(figsize=(10, 8))
plt.plot(lambda_candidate, training_cost, label='training')
plt.plot(lambda_candidate, cross_validation_cost, label='cross validation')
plt.legend(loc=2)

plt.xlabel('lambda')
plt.ylabel('cost')
plt.show()


best_lambda_index = np.argmin(cross_validation_cost)
best_lambda = lambda_candidate[best_lambda_index]
best_lambda  # for cross validation dataset

1


# use test data 
lambda_candidate2 = [0, 0.01, 0.03, 0.1, 0.2, 0.3, 0.4, 0.5, 1, 2]
for l in lambda_candidate2:
    theta = trainLinearReg(X_poly, y, l).x
    print('test cost(l={}) = {}'.format(l, linearRegCostFunction(theta, Xtest_poly, ytest, 0)))

test cost(l=0) = 10.551425432669346
test cost(l=0.01) = 10.88035182200567
test cost(l=0.03) = 10.022274720437942
test cost(l=0.1) = 8.631939930646022
test cost(l=0.2) = 7.753395943326273
test cost(l=0.3) = 7.336639301523873
test cost(l=0.4) = 7.139677748128903
test cost(l=0.5) = 7.066548903774858
test cost(l=1) = 7.4662640770936175
test cost(l=2) = 9.358707950127894

	f1	f2	f3
0	-15.936758	253.980260	-4047.621971
1	-29.152979	849.896197	-24777.006175
2	36.189549	1309.683430	47396.852168
3	37.492187	1405.664111	52701.422173
4	-48.058829	2309.651088	-110999.127750
5	-8.941458	79.949670	-714.866612
6	15.307793	234.328523	3587.052500
7	-34.706266	1204.524887	-41804.560890
8	1.389154	1.929750	2.680720
9	-44.383760	1969.918139	-87432.373590
10	7.013502	49.189211	344.988637
11	22.762749	518.142738	11794.353058