import matplotlib.pyplot as plt
import numpy as np
import scipy.io as sio
import matplotlib
import scipy.optimize as opt
from sklearn import svm
from google.colab import drive
drive.mount('/content/drive')
Mounted at /content/drive
ex6data1 = sio.loadmat('/content/drive/My Drive/AndrewNg-ML/ex6data1.mat');
X1, y1 = ex6data1['X'], ex6data1['y']
def plotData(pos, neg):
plt.figure(figsize=(10,6))
plt.plot(pos[:,0],pos[:,1],'k+',label='Positive Sample')
plt.plot(neg[:,0],neg[:,1],'yo',label='Negative Sample')
plt.xlabel('Column 1 Variable')
plt.ylabel('Column 2 Variable')
plt.legend()
plt.grid(True)
pos1 = np.array([X1[i] for i in range(X1.shape[0]) if y1[i] == 1])
neg1 = np.array([X1[i] for i in range(X1.shape[0]) if y1[i] == 0])
plotData(pos1, neg1)
xvals = np.linspace(0, 4.5, 100)
yvals = np.linspace(1.5, 5, 100)
test = np.array([xvals, yvals])
test
array([[0. , 0.04545455, 0.09090909, 0.13636364, 0.18181818, 0.22727273, 0.27272727, 0.31818182, 0.36363636, 0.40909091, 0.45454545, 0.5 , 0.54545455, 0.59090909, 0.63636364, 0.68181818, 0.72727273, 0.77272727, 0.81818182, 0.86363636, 0.90909091, 0.95454545, 1. , 1.04545455, 1.09090909, 1.13636364, 1.18181818, 1.22727273, 1.27272727, 1.31818182, 1.36363636, 1.40909091, 1.45454545, 1.5 , 1.54545455, 1.59090909, 1.63636364, 1.68181818, 1.72727273, 1.77272727, 1.81818182, 1.86363636, 1.90909091, 1.95454545, 2. , 2.04545455, 2.09090909, 2.13636364, 2.18181818, 2.22727273, 2.27272727, 2.31818182, 2.36363636, 2.40909091, 2.45454545, 2.5 , 2.54545455, 2.59090909, 2.63636364, 2.68181818, 2.72727273, 2.77272727, 2.81818182, 2.86363636, 2.90909091, 2.95454545, 3. , 3.04545455, 3.09090909, 3.13636364, 3.18181818, 3.22727273, 3.27272727, 3.31818182, 3.36363636, 3.40909091, 3.45454545, 3.5 , 3.54545455, 3.59090909, 3.63636364, 3.68181818, 3.72727273, 3.77272727, 3.81818182, 3.86363636, 3.90909091, 3.95454545, 4. , 4.04545455, 4.09090909, 4.13636364, 4.18181818, 4.22727273, 4.27272727, 4.31818182, 4.36363636, 4.40909091, 4.45454545, 4.5 ], [1.5 , 1.53535354, 1.57070707, 1.60606061, 1.64141414, 1.67676768, 1.71212121, 1.74747475, 1.78282828, 1.81818182, 1.85353535, 1.88888889, 1.92424242, 1.95959596, 1.99494949, 2.03030303, 2.06565657, 2.1010101 , 2.13636364, 2.17171717, 2.20707071, 2.24242424, 2.27777778, 2.31313131, 2.34848485, 2.38383838, 2.41919192, 2.45454545, 2.48989899, 2.52525253, 2.56060606, 2.5959596 , 2.63131313, 2.66666667, 2.7020202 , 2.73737374, 2.77272727, 2.80808081, 2.84343434, 2.87878788, 2.91414141, 2.94949495, 2.98484848, 3.02020202, 3.05555556, 3.09090909, 3.12626263, 3.16161616, 3.1969697 , 3.23232323, 3.26767677, 3.3030303 , 3.33838384, 3.37373737, 3.40909091, 3.44444444, 3.47979798, 3.51515152, 3.55050505, 3.58585859, 3.62121212, 3.65656566, 3.69191919, 3.72727273, 3.76262626, 3.7979798 , 3.83333333, 3.86868687, 3.9040404 , 3.93939394, 3.97474747, 4.01010101, 4.04545455, 4.08080808, 4.11616162, 4.15151515, 4.18686869, 4.22222222, 4.25757576, 4.29292929, 4.32828283, 4.36363636, 4.3989899 , 4.43434343, 4.46969697, 4.50505051, 4.54040404, 4.57575758, 4.61111111, 4.64646465, 4.68181818, 4.71717172, 4.75252525, 4.78787879, 4.82323232, 4.85858586, 4.89393939, 4.92929293, 4.96464646, 5. ]])
test1 = np.array([xvals[1], yvals[1]])
test1.reshape(1, 2)
test1
array([0.04545455, 1.53535354])
#Function to draw the SVM boundary
def plotBoundary(svm_function, xmin, xmax, ymin, ymax):
"""
Function to plot the decision boundary for a trained SVM
It works by making a grid of x1 ("xvals") and x2 ("yvals") points,
And for each, computing whether the SVM classifies that point as
True or False. Then, a contour is drawn with a built-in pyplot function.
"""
xvals = np.linspace(xmin,xmax,100) # var1 de X1
yvals = np.linspace(ymin,ymax,100) # var2 de X1
zvals = np.zeros((len(xvals), len(yvals)))
for i in range(len(xvals)):
for j in range(len(yvals)):
zvals[i][j] = float(svm_function.predict(np.array([xvals[i], yvals[j]]).reshape(1, 2)))
zvals = zvals.transpose()
u, v = np.meshgrid(xvals, yvals)
mycontour = plt.contour(xvals, yvals, zvals, [0])
plt.title("Decision Boundary")
#First we make an instance of an SVM with C=1 and 'linear' kernel
linear_svm = svm.SVC(C=1, kernel='linear')
linear_svm.fit(X1, y1.flatten())
SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
linear_svm.score(X1, y1.flatten())
0.9803921568627451
plotData(pos1, neg1)
plotBoundary(linear_svm, 0, 4.5, 1.5, 5)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:20: UserWarning: No contour levels were found within the data range.
# When C = 100, you should find that the SVM now classifies every
# single example correctly, but has a decision boundary that does
# not appear to be a natural fit for the data.
linear_svm_100 = svm.SVC(C=100, kernel='linear')
linear_svm_100.fit(X1, y1.flatten())
SVC(C=100, break_ties=False, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
linear_svm_100.score(X1, y1.flatten())
1.0
plotData(pos1, neg1)
plotBoundary(linear_svm_100, 0, 4.5, 1.5, 5)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:20: UserWarning: No contour levels were found within the data range.
ex6data2 = sio.loadmat('/content/drive/My Drive/AndrewNg-ML/ex6data2.mat')
X2, y2 = ex6data2['X'], ex6data2['y']
#Divide the sample into two: ones with positive classification, one with null classification
pos2 = np.array([X2[i] for i in range(X2.shape[0]) if y2[i] == 1])
neg2 = np.array([X2[i] for i in range(X2.shape[0]) if y2[i] == 0])
plotData(pos2, neg2)
# Train the SVM with the Gaussian kernel on this dataset.
sigma = 0.1
gamma = np.power(sigma, -2.)
gaus_svm = svm.SVC(C=1, kernel='rbf', gamma=gamma)
gaus_svm.fit(X2, y2.flatten())
SVC(C=1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0, decision_function_shape='ovr', degree=3, gamma=99.99999999999999, kernel='rbf', max_iter=-1, probability=False, random_state=None, shrinking=True, tol=0.001, verbose=False)
gaus_svm.score(X2, y2.flatten())
0.9976825028968713
plotData(pos2, neg2)
plotBoundary(gaus_svm, 0, 1, .4, 1.0)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:20: UserWarning: No contour levels were found within the data range.
ex6data3 = sio.loadmat('/content/drive/My Drive/AndrewNg-ML/ex6data3.mat')
X3, y3 = ex6data3['X'], ex6data3['y']
pos3 = np.array([X3[i] for i in range(X3.shape[0]) if y3[i] == 1])
neg3 = np.array([X3[i] for i in range(X3.shape[0]) if y3[i] == 0])
Xval, yval = ex6data3['Xval'], ex6data3['yval']
def find_best_parameters(c_values, sigma_values, X, y, Xval, yval):
best_pair, best_score = (0, 0), 0
for c in c_values:
for sigma in sigma_values:
gamma = np.power(sigma, -2.)
gaus_svm = svm.SVC(C=c, kernel='rbf', gamma=gamma)
gaus_svm.fit(X, y)
this_score = gaus_svm.score(Xval,yval)
#print this_score
if this_score > best_score:
best_score = this_score
best_pair = (c, sigma)
print ("Best C, sigma pair is (%f, %f) with a score of %f."%(best_pair[0],best_pair[1],best_score))
return best_pair
c_values = (0.01, 0.03, 0.1, 0.3, 1., 3., 10., 30.)
sigma_values = (0.01, 0.03, 0.1, 0.3, 1., 3., 10., 30.)
best_pair = find_best_parameters(c_values, sigma_values, X3, y3.flatten(), Xval, yval)
Best C, sigma pair is (0.300000, 0.100000) with a score of 0.965000.
best_pair
(0.3, 0.1)
plotData(pos3, neg3)
gaus_svm2 = svm.SVC(C=best_pair[0], kernel='rbf', gamma = np.power(best_pair[1],-2.))
gaus_svm2.fit(X3, y3.flatten())
plotData(pos3, neg3)
plotBoundary(gaus_svm2, -.5, .3, -.8, .6)
/usr/local/lib/python3.7/dist-packages/ipykernel_launcher.py:20: UserWarning: No contour levels were found within the data range.