import numpy as np
import matplotlib.pyplot as plt
from google.colab import files
from scipy.io import loadmat
from scipy.optimize import minimize
from cvxopt import matrix, solvers


uploaded = files.upload()

Saving simple_iris.mat to simple_iris.mat


iris = loadmat('simple_iris.mat')
X_iris = iris['x']
c_iris = iris['c']


uploaded = files.upload()

Saving simple_nonlinear.mat to simple_nonlinear.mat


nonlinear = loadmat('simple_nonlinear.mat')
X_nl = nonlinear['x']
c_nl = nonlinear['c']


# svm train

def polynomial_kernel(x, y, p=2):
    return np.power((1 + np.dot(x,y)), p)

def svm_train(c, X, K):
    N = X.shape[0]
    c = c.astype(np.float64)  
    model = {}
    model['X'] = X
    
    if K == "linear":
        kernel_matrix = np.dot(X, np.transpose(X)) 
       
    if K == "polynomial":
        kernel_matrix = np.zeros((N, N))
        for i in range(0, N):
            for j in range(0, N):
                kernel_matrix[i, j] = polynomial_kernel(X[i], X[j])
   

    # calculate P, q, G, h, A, b
    P = matrix(kernel_matrix * np.outer(c, c))  
    q = matrix(-np.ones([N, 1], np.float64))                       
    G = matrix(-np.eye(N))                                
    h = matrix(np.zeros([N, 1], np.float64))                                                                        
    A = matrix(c, (1, N))                                    
    b = matrix(0.) 

    # solve this quadratic optimalization problem by using solvers.qp()
    sol = solvers.qp(P, q, G, h, A, b)

    # calculate Lagrange multipliers lambda
    # convert the result into an array 
    lagrange_lambda = sol['x']
    lagrange_lambda = np.matrix(lagrange_lambda)
    lagrange_lambda = lagrange_lambda.reshape(1, -1)[0]
    lagrange_lambda = np.array(lagrange_lambda)[0]

    # save lagrange multipliers in the model
    model['lambda'] = lagrange_lambda

    # find the indexes of lambda which are > 0
    index = lagrange_lambda > 1e-6  # boolean
    lambda_0 = lagrange_lambda[index]

    # save support vector in order to draw the graph 
    model['support_vectors'] = X[index]

    # convert c into an array 
    c_array = c.reshape(1, -1)[0]

    # finc the c_j s.t. lambda_j > 0
    c_j = c_array[index]

    K_ij = kernel_matrix[:, index]

    # calculat b
    # b = mean(\sum_{i=1}^Nc_i\lambda_i\sum_{i=1, j=1}^{N}K_{i,j} - c_j) \quad s.t. \lambda_{j} > 0 
    b_array = []

    for i in range(len(c_j)):
        sum = np.sum(c_array * lagrange_lambda * K_ij[:, i])
        b_array.append(sum - c_j[i])

    b_mean = np.mean(b_array)

    model['b'] = b_mean

    weight = c_array * lagrange_lambda

    model['weight'] = weight

    return model


def sigmoid(x):
	return 1.0 / (1.0 + np.exp(x))


# svm classfiy 
# we can do the prediction by using the given dataset 

# Y is test dataset, model['X'] is traning dataset
def svm_classify(model, Y, K):
    X = np.matrix(model['X'])  # n * m 
    n = X.shape[0]
    l = Y.shape[0]
    w = model['weight']
    b = model['b']
    c_hat = np.zeros(len(Y))
    d_sigmoid = np.zeros(len(Y))
    ans = {}
    
   
    if K == "linear":
        kernel_matrix = np.dot(Y, np.transpose(X)) # (l*m) * (m*n) = l*n
        c_hat = np.sign(np.dot(np.transpose(w), kernel_matrix) - b)
       
    if K == "polynomial":
        # initialize kernel matrix      
        kernel_matrix = np.array([[0]*n]*l)

        # for each row of test dataset 
        for j in range(len(Y)):
            # initialize prediction 
            prediction = 0.0
            # for each value of weight = c * lambda 
            for i in range(len(w)):
                # $c_{predict} = \sum_{i=1}^{N}v_iK_y[i] -b $ 
                # with $K_y[i] = K(y, x_i) $ 
                prediction += w[i] * polynomial_kernel(Y[j], np.transpose(X[i]))
            prediction = prediction - b
            each_sigmoid = sigmoid(prediction)
         
            c_hat[j] = np.sign(prediction)
            d_sigmoid[j] = each_sigmoid

    """ 
    # validation
    c_hat_array = np.array(c_hat)
    c_hat_array = c_hat_array.reshape(1, -1)[0]
    c_array = c_nl.reshape(1, -1)[0]
  
    diff = []
    for i in range(len(c_hat_array)):
      diff.append(c_hat_array[i] - c_array[i])
    """

    ans['c_hat'] = c_hat
    ans['d_sigmoid'] = d_sigmoid

    return ans


# graph: red points for -1, blue points for 1, black stars for support vectors
# hyperplan: W^Tx + b = 0

iris = loadmat('simple_iris.mat')
X_iris = iris['x']
c_iris = iris['c']

model = svm_train(c_iris, X_iris, 'linear')

# w is actually the "v" in the lecture notes
w = model['weight']
b = model['b']
support_vector_iris = model['support_vectors']

# visualization 
label_color = [['red' if i == -1 else 'blue'] for i in c_iris]
plt.figure()
plt.scatter(X_iris[:,0], X_iris[:,1], c = np.squeeze(label_color))
plt.scatter(support_vector_iris[:,0], support_vector_iris[:,1], marker='*', c='k', s=100)

# to obtain the real weight in order to draw the graph
# we have to the multiplication of X and w
copy_x = X_iris
for i in range(len(w)):
    copy_x[i] = copy_x[i] * w[i]

# calculate the sum of each feature
# we obtain the real weight  
w_real = np.sum(copy_x, axis=0)

w1 = w_real[0]
w2 = w_real[1]

# np.dot((w1, w2), (x_feature1, x_feature2) - b = 0
# --> w1x1 + w2x2 - b = 0
# --> x2 = (-w1/w2)x1 + b/w2
# scope
k = -w1 / w2

# space xx
xx = np.linspace(4, 7)
# space yy 
yy = k * xx + (b) / w2

plt.title('$W^Tx + b = 0$', fontsize=14)
plt.plot(xx, yy, 'k-', label = '$W^Tx + b = 0$')
plt.xlabel('$X_1$')
plt.ylabel('$X_2$')
plt.legend(loc='upper left')

     pcost       dcost       gap    pres   dres
 0: -1.6533e+01 -3.5500e+01  3e+02  2e+01  2e+00
 1: -2.3508e+01 -2.9152e+01  1e+02  6e+00  7e-01
 2: -3.3103e+01 -3.5918e+01  1e+02  4e+00  4e-01
 3: -3.0835e+01 -3.0686e+01  4e+01  1e+00  2e-01
 4: -2.5215e+01 -2.5464e+01  3e+00  1e-01  1e-02
 5: -2.5001e+01 -2.5005e+01  4e-02  1e-03  1e-04
 6: -2.5000e+01 -2.5000e+01  4e-04  1e-05  1e-06
 7: -2.5000e+01 -2.5000e+01  4e-06  1e-07  1e-08
 8: -2.5000e+01 -2.5000e+01  4e-08  1e-09  1e-10
Optimal solution found.

<matplotlib.legend.Legend at 0x7fa60b890610>


# graph: red points for -1, blue points for 1
# hyperplan: W^Tx + b = -1, 0, 1

iris = loadmat('simple_iris.mat')
X_iris = iris['x']
c_iris = iris['c']

model = svm_train(c_iris, X_iris, 'linear')

# w is actually the "v" in the lecture notes
w = model['weight']
b = model['b']


# visualization 
label_color = [['red' if i == -1 else 'blue'] for i in c_iris]
plt.figure()
plt.scatter(X_iris[:,0], X_iris[:,1], c = np.squeeze(label_color))


# to obtain the real weight in order to draw the graph
# we have to the multiplication of X and w
copy_x = X_iris
for i in range(len(w)):
    copy_x[i] = copy_x[i] * w[i]

# calculate the sum of each feature
# we obtain the real weight  
w_real = np.sum(copy_x, axis=0)

w1 = w_real[0]
w2 = w_real[1]

# np.dot((w1, w2), (x_feature1, x_feature2) - b = -1,0,1
# --> w1x1 + w2x2 - b = -1,0,1
# --> x2 = (-w1/w2)x1 + (b-1 or 0 or +1)/w2
# scope
k = -w1 / w2

# space xx
xx = np.linspace(4, 7)
# space yy 
yy = k * xx + (b) / w2
yy_neg1 = k * xx + (b-1) / w2
yy_pos1 = k * xx + (b+1) / w2

plt.title('$W^Tx + b = -1, W^Tx + b = 0, W^Tx + b = 1$', fontsize=14)
plt.plot(xx, yy, 'k-', label = '$W^Tx + b = 0$')
plt.plot(xx, yy_neg1, 'g--', label = '$W^Tx + b = -1$')
plt.plot(xx, yy_pos1, 'y--', label = '$W^Tx + b = 1$')
plt.xlabel('$X_1$')
plt.ylabel('$X_2$')
plt.legend(loc='upper left')

     pcost       dcost       gap    pres   dres
 0: -1.6533e+01 -3.5500e+01  3e+02  2e+01  2e+00
 1: -2.3508e+01 -2.9152e+01  1e+02  6e+00  7e-01
 2: -3.3103e+01 -3.5918e+01  1e+02  4e+00  4e-01
 3: -3.0835e+01 -3.0686e+01  4e+01  1e+00  2e-01
 4: -2.5215e+01 -2.5464e+01  3e+00  1e-01  1e-02
 5: -2.5001e+01 -2.5005e+01  4e-02  1e-03  1e-04
 6: -2.5000e+01 -2.5000e+01  4e-04  1e-05  1e-06
 7: -2.5000e+01 -2.5000e+01  4e-06  1e-07  1e-08
 8: -2.5000e+01 -2.5000e+01  4e-08  1e-09  1e-10
Optimal solution found.

<matplotlib.legend.Legend at 0x7fa60b716b50>


# reference:
# https://stackoverflow.com/questions/62416529/how-to-plot-the-non-linear-decision-boundary-using-the-parameters-obtained-from
# Classify the grid graph by using classification function
# yellow triangle --> predict_label = -1
# green star --> predict_label = 1

nonlinear = loadmat('simple_nonlinear.mat')
X_nl = nonlinear['x']
c_nl = nonlinear['c']

X1 = X_nl[:, 0]
X2 = X_nl[:, 1]

# train simple_nonlinear.mat dataset by using polynomial kernel function
model_nl = svm_train(c_nl, X_nl, 'polynomial')
prediction = svm_classify(model_nl, X_nl, 'polynomial')
# obtain the prediction by using classification function 
c_predict = prediction['c_hat']

c_pos_predict = c_predict == 1
c_pos_predict = np.array(c_pos_predict).reshape(-1,1)
c_pos_array_predict = []
for i in c_pos_predict:
    c_pos_array_predict.append(i[0])

c_neg_predict = (c_predict == -1.)
c_neg_predict = np.array(c_neg_predict).reshape(-1,1)
c_neg_array_predict = []
for i in c_neg_predict:
    c_neg_array_predict.append(i[0])

x_neg_predict = X1[c_neg_array_predict]
y_neg_predict = X2[c_neg_array_predict]
x_pos_predict = X1[c_pos_array_predict]
y_pos_predict = X2[c_pos_array_predict]

plt.title('Classifiez cette grille en utilisant votre fonction de classification', fontsize=14)
plt.plot(x_neg_predict, y_neg_predict, "y^", x_pos_predict, y_pos_predict, "g*", markersize=8)
plt.xlabel(r'$X_1$', fontsize=14)
plt.ylabel(r'$X_2$', fontsize=14)

# Turn on the minor TICKS, which are required for the minor GRID
plt.minorticks_on()
# Customize the major grid
plt.grid(which='major', linestyle='-', linewidth='0.3', color='black')
# Customize the minor grid
plt.grid(which='minor', linestyle=':', linewidth='0.3', color='black')
plt.legend(["label: -1", "label: 1"], loc="upper right", prop=dict(size=8))

plt.show()
c_predict

     pcost       dcost       gap    pres   dres
 0: -2.7260e+01 -8.1302e+01  2e+02  8e+00  3e+00
 1: -9.4852e+01 -1.5197e+02  8e+01  4e+00  1e+00
 2: -2.2261e+02 -3.0850e+02  1e+02  3e+00  1e+00
 3: -2.9943e+02 -3.9733e+02  1e+02  3e+00  9e-01
 4: -6.4823e+02 -8.0385e+02  2e+02  2e+00  7e-01
 5: -9.0735e+02 -1.1240e+03  2e+02  2e+00  5e-01
 6: -1.0806e+03 -1.1689e+03  9e+01  3e-01  1e-01
 7: -1.0892e+03 -1.0922e+03  3e+00  9e-03  3e-03
 8: -1.0897e+03 -1.0908e+03  1e+00  3e-03  8e-04
 9: -1.0901e+03 -1.0903e+03  1e-01  3e-04  9e-05
10: -1.0902e+03 -1.0902e+03  1e-03  3e-06  9e-07
11: -1.0902e+03 -1.0902e+03  1e-05  3e-08  9e-09
Optimal solution found.

array([ 1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1.,  1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
       -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.])


# Grahe: red for c=-1, blue for c=1, black line connects all support vectors 

nonlinear = loadmat('simple_nonlinear.mat')
X_nl = nonlinear['x']
c_nl = nonlinear['c']

X1 = X_nl[:, 0]
X2 = X_nl[:, 1]

model_nl = svm_train(c_nl, X_nl, 'polynomial')
support_vector = model_nl['support_vectors']

c_pos = c_nl == 1
c_pos = np.array(c_pos).reshape(-1,1)
c_pos_array = []
for i in c_pos:
    c_pos_array.append(i[0])


c_neg = c_nl == -1
c_neg = np.array(c_neg).reshape(-1,1)
c_neg_array = []
for i in c_neg:
    c_neg_array.append(i[0])

x_neg = X1[c_neg_array]
y_neg = X2[c_neg_array]
x_pos = X1[c_pos_array]
y_pos = X2[c_pos_array]

plt.plot(x_neg, y_neg, "ro", x_pos, y_pos, "bo", markersize=5)

# Adding decision boundary to plot
bound_x = support_vector[:, 0]
bound_y = support_vector[:, 1]

plt.plot(bound_x, bound_y, 'k', lw=1)

plt.title('Tracez les points de la grille avec les couleurs comme suggérées dans 3.(a)-(c)', fontsize=14)

plt.xlabel(r'$X_1$',fontsize=14)
plt.ylabel(r'$X_2$',fontsize=14)

# Turn on the minor TICKS, which are required for the minor GRID
plt.minorticks_on()
# Customize the major grid
plt.grid(which='major', linestyle='-', linewidth='0.3', color='black')
# Customize the minor grid
plt.grid(which='minor', linestyle=':', linewidth='0.3', color='black')
plt.legend(["label: -1", "label: 1"], loc="upper right", prop=dict(size=8))

plt.show()

     pcost       dcost       gap    pres   dres
 0: -2.7260e+01 -8.1302e+01  2e+02  8e+00  3e+00
 1: -9.4852e+01 -1.5197e+02  8e+01  4e+00  1e+00
 2: -2.2261e+02 -3.0850e+02  1e+02  3e+00  1e+00
 3: -2.9943e+02 -3.9733e+02  1e+02  3e+00  9e-01
 4: -6.4823e+02 -8.0385e+02  2e+02  2e+00  7e-01
 5: -9.0735e+02 -1.1240e+03  2e+02  2e+00  5e-01
 6: -1.0806e+03 -1.1689e+03  9e+01  3e-01  1e-01
 7: -1.0892e+03 -1.0922e+03  3e+00  9e-03  3e-03
 8: -1.0897e+03 -1.0908e+03  1e+00  3e-03  8e-04
 9: -1.0901e+03 -1.0903e+03  1e-01  3e-04  9e-05
10: -1.0902e+03 -1.0902e+03  1e-03  3e-06  9e-07
11: -1.0902e+03 -1.0902e+03  1e-05  3e-08  9e-09
Optimal solution found.


# draw the graph according to the confidence intervals 

nonlinear = loadmat('simple_nonlinear.mat')
X_nl = nonlinear['x']
c_nl = nonlinear['c']

X1 = X_nl[:, 0]
X2 = X_nl[:, 1]

model_nl = svm_train(c_nl, X_nl, 'polynomial')
prediction = svm_classify(model_nl, X_nl, 'polynomial')

# obtain the value after applying sigmoid function 
d_sigmoid = prediction['d_sigmoid']
d_sigmoid

# d_more: value > 0.5
d_more = d_sigmoid > 0.5
d_more = np.array(d_more).reshape(-1,1)
d_more_array = []
for i in d_more:
    d_more_array.append(i[0])


# d_less: value < 0.5
d_less = d_sigmoid < 0.5
d_less = np.array(d_less).reshape(-1,1)
d_less_array = []
for i in d_less:
    d_less_array.append(i[0])

x_less = X1[d_less_array]
y_less = X2[d_less_array]
x_more = X1[d_more_array]
y_more = X2[d_more_array]

plt.plot(x_less, y_less, "yo", x_more, y_more, "go", markersize=5)
plt.title('Tracez les points de la grille colorés d’après leur niveau de confiance', fontsize=14)

plt.xlabel(r'$X_1$',fontsize=14)
plt.ylabel(r'$X_2$',fontsize=14)

# Turn on the minor TICKS, which are required for the minor GRID
plt.minorticks_on()
# Customize the major grid
plt.grid(which='major', linestyle='-', linewidth='0.3', color='black')
# Customize the minor grid
plt.grid(which='minor', linestyle=':', linewidth='0.3', color='black')
plt.legend(["confiance < 0.5", "confiance > 0.5"], loc="upper right", prop=dict(size=8))

plt.show()
d_sigmoid

     pcost       dcost       gap    pres   dres
 0: -2.7260e+01 -8.1302e+01  2e+02  8e+00  3e+00
 1: -9.4852e+01 -1.5197e+02  8e+01  4e+00  1e+00
 2: -2.2261e+02 -3.0850e+02  1e+02  3e+00  1e+00
 3: -2.9943e+02 -3.9733e+02  1e+02  3e+00  9e-01
 4: -6.4823e+02 -8.0385e+02  2e+02  2e+00  7e-01
 5: -9.0735e+02 -1.1240e+03  2e+02  2e+00  5e-01
 6: -1.0806e+03 -1.1689e+03  9e+01  3e-01  1e-01
 7: -1.0892e+03 -1.0922e+03  3e+00  9e-03  3e-03
 8: -1.0897e+03 -1.0908e+03  1e+00  3e-03  8e-04
 9: -1.0901e+03 -1.0903e+03  1e-01  3e-04  9e-05
10: -1.0902e+03 -1.0902e+03  1e-03  3e-06  9e-07
11: -1.0902e+03 -1.0902e+03  1e-05  3e-08  9e-09
Optimal solution found.

array([0.19136142, 0.24446287, 0.25500254, 0.2754326 , 0.24850106,
       0.15312651, 0.12287385, 0.06094058, 0.06969795, 0.0909299 ,
       0.08071826, 0.129266  , 0.15670854, 0.0708128 , 0.14182777,
       0.09416656, 0.10302317, 0.17118482, 0.17477388, 0.16387525,
       0.23135321, 0.12029017, 0.05299492, 0.10719933, 0.2754326 ,
       0.73745221, 0.81489066, 0.8170928 , 0.73745221, 0.73745221,
       0.73804305, 0.81323143, 0.73745221, 0.9330996 , 0.74548297,
       0.94519217, 0.96986823, 0.96187003, 0.94219351, 0.98542981,
       0.95856912, 0.9923512 , 0.95418043, 0.99514323, 0.96574857,
       0.99219911, 0.91730064, 0.99706803, 0.99151918, 0.9814155 ])