# Start writing code here...
import numpy as np
import sklearn.svm as svm
import matplotlib.pyplot as plt
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
# From Paul Mayer Office Hours
x = np.linspace(0, 4*np.pi,500)
all_data = np.random.rand(500,1)*25
# Creating a cosine shaped boundary and centering it
cos = np.cos(x)*6 + 25/2
group1 = []
group2 = []
x1 = []
x2 = []
for i in range (len(all_data)):
# Adding noise by shifting the boundary slightly at each point
if all_data[i] > cos[i] + np.random.uniform(-1, 1)*3:
group1.append(all_data[i])
x1.append(i)
else:
group2.append(all_data[i])
x2.append(i)
plt.plot(x1, group1, "rx")
plt.plot(x2, group2, "bo")
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("Sinusoid split data")
plt.show()
# Create synthetic data
X = []
y = []
index1 = np.concatenate((x1, x2), axis = None)
index2 = np.concatenate((group1, group2), axis = None)
X.append(index1)
X.append(index2)
X = np.array(np.transpose(X))
y = np.concatenate(([0]*len(group1), [1]*len(group2)), axis = None)
# Use rbf kernel, and center data using standard scaler
pipe = make_pipeline(StandardScaler(), svm.SVC(kernel='rbf', gamma = 'auto'))
pipe.fit(X,y)
# Show how the model works
test_pts = []
for i in range(0, 500):
for j in range(0, 25):
test_pts.append([i,j])
test_pts_np = np.array(test_pts)
test_y = pipe.predict(test_pts_np)
idx_c1 = (test_y == 0)
idx_c2 = (test_y == 1)
plt.plot(test_pts_np[idx_c1, 0], test_pts_np[idx_c1, 1], 'r')
plt.plot(test_pts_np[idx_c2, 0], test_pts_np[idx_c2, 1], 'b')
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("Sinusoid predicted data")
plt.show()
x1 = np.linspace(-5, 5, 40)
x2 = np.linspace(-5, 5, 40)
X = []
y = []
for x1_i in x1:
for x2_i in x2:
X.append((x1_i, x2_i, x1_i**2 + x2_i**2))
# Add some noise
if np.sqrt(x1_i**2 + x2_i**2) < 2.5 + np.random.uniform(-0.4,0.4):
y.append(1)
else:
y.append(0)
X = np.array(X)
y = np.array(y)
xs_g1 = X[y == 0, :]
xs_g2 = X[y == 1, :]
# The data without the extra dimension.
plt.plot(xs_g1[:, 0], xs_g1[:, 1], "bo")
plt.plot(xs_g2[:, 0], xs_g2[:, 1], "rx")
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("Scatter Plot of Figure 1 Data")
plt.show()
[-5. -5. 50.]
pipe = make_pipeline(StandardScaler(), svm.SVC(kernel='linear', gamma='scale'))
pipe.fit(X, y)
test_pts = []
x1 = np.linspace(-5, 5, 60)
x2 = np.linspace(-5, 5, 60)
for x1_v in x1:
for x2_v in x2:
test_pts.append([x1_v, x2_v, x1_v ** 2 + x2_v ** 2])
test_pts_np = np.array(test_pts)
test_y = pipe.predict(test_pts_np)
idx_c1 = (test_y == 0)
idx_c2 = (test_y == 1)
plt.scatter(test_pts_np[idx_c1, 0], test_pts_np[idx_c1, 1], c="b")
plt.scatter(test_pts_np[idx_c2, 0], test_pts_np[idx_c2, 1], c="r")
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("Scatter Plot of Decision Boundary")
plt.show()
# RBF without third dimension
pipe = make_pipeline(StandardScaler(), svm.SVC(kernel='rbf', gamma='scale'))
pipe.fit(X[:,0:2], y)
test_pts = []
x1 = np.linspace(-5, 5, 60)
x2 = np.linspace(-5, 5, 60)
for x1_v in x1:
for x2_v in x2:
test_pts.append([x1_v, x2_v])
test_pts_np = np.array(test_pts)
test_y = pipe.predict(test_pts_np)
idx_c1 = (test_y == 0)
idx_c2 = (test_y == 1)
plt.scatter(test_pts_np[idx_c1, 0], test_pts_np[idx_c1, 1], c="b")
plt.scatter(test_pts_np[idx_c2, 0], test_pts_np[idx_c2, 1], c="r")
plt.xlabel("x1")
plt.ylabel("x2")
plt.title("Scatter Plot of Decision Boundary")
plt.show()