a = Value(4)
print(a.grad)
def f(x): return (x+2)**3 + x**2
f(a).backward()
print(a.grad)
0
116
# utility methd ignore
def method(C,*L):
for f in L:
setattr(C,f.__name__,f)
class Value:
""" stores a single scalar value and its gradient """
def __init__(self, data, _children=()):
self.data = data
self.grad = 0
self._backward = lambda: None
self._prev = set(_children)
def backward(self, visited= None): # slightly shorter code to fit in the blog
if visited is None:
visited= set([self])
self.grad = 1
self._backward()
for child in self._prev:
if not child in visited:
visited.add(child)
child.backward(visited)
method(Value,backward)
def __add__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data + other.data, (self, other))
def _backward():
self.grad += out.grad
other.grad += out.grad
out._backward = _backward
return out
method(Value,__add__)
x = Value(5)
y = x+x+x+x
y.backward()
x.grad
def __mul__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data * other.data, (self, other))
def _backward():
self.grad += other.data * out.grad
other.grad += self.data * out.grad
out._backward = _backward
return out
method(Value,__mul__)
x = Value(10)
y = x*x*x
y.backward()
x.grad
def __pow__(self, other):
out = Value(self.data**other, (self,))
def _backward():
self.grad += (other * self.data**(other-1)) * out.grad
out._backward = _backward
return out
method(Value,__pow__)
a = Value(4)
print(a.grad)
def f(x): return (x+2)**3 + x**2
f(a).backward()
print(a.grad)
0
116
import random
class Value:
""" stores a single scalar value and its gradient """
def __init__(self, data = None, _children=(), _op=''):
if data is None:
data = random.uniform(-1,1)
self.data = data
self.grad = 0
# internal variables used for autograd graph construction
self._backward = lambda: None
self._prev = set(_children)
self._op = _op # the op that produced this node, for graphviz / debugging / etc
def backward(self):
# topological order all of the children in the graph
topo = []
visited = set()
def build_topo(v):
if v not in visited:
visited.add(v)
for child in v._prev:
build_topo(child)
topo.append(v)
build_topo(self)
# go one variable at a time and apply the chain rule to get its gradient
self.grad = 1
for v in reversed(topo):
v._backward()
# Arithmetic operations
def __add__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data + other.data, (self, other), '+')
def _backward():
self.grad += out.grad
other.grad += out.grad
out._backward = _backward
return out
def __mul__(self, other):
other = other if isinstance(other, Value) else Value(other)
out = Value(self.data * other.data, (self, other), '*')
def _backward():
self.grad += other.data * out.grad
other.grad += self.data * out.grad
out._backward = _backward
return out
# In first reading you can ignore all code below here - if you understand everything above then you undestand the main concepts
##############################################################################################################################
def __pow__(self, other):
assert isinstance(other, (int, float)), "only supporting int/float powers for now"
out = Value(self.data**other, (self,), f'**{other}')
def _backward():
self.grad += (other * self.data**(other-1)) * out.grad
out._backward = _backward
return out
def relu(self):
out = Value(0 if self.data < 0 else self.data, (self,), 'ReLU')
def _backward():
self.grad += (out.data > 0) * out.grad
out._backward = _backward
return out
# Other operations implemented in terms of prior ones
def __float__(self): return float(self.data)
def __neg__(self): return self * -1
def __radd__(self, other): return self + other
def __sub__(self, other): return self + (-other)
def __rsub__(self, other): return other + (-self)
def __rmul__(self, other): return self * other
def __truediv__(self, other): return self * other**-1
def __rtruediv__(self, other): return other * self**-1
def __repr__(self): return f"Value(data={self.data}, grad={self.grad})"
# Helper functions for plotting - ignore
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
sns.set()
import random
import numpy as np
# Helper function to plot datapoints and function approximators
def plot(X,Y,f=None,pt_legend='',line_legend='',title='', ylim=None):
fig, ax = plt.subplots(figsize=(8,8))
ln = None
if Y:
ln= ax.scatter(X,Y, color='blue', label=pt_legend)
if f:
ln = ax.plot(X,[f(x) for x in X], color='red', label=line_legend)[0]
if pt_legend or line_legend:
ax.legend()
if ylim:
ax.set_ylim(*ylim)
return fig,ax,ln
# Utility code to animate SGD
from matplotlib.animation import FuncAnimation
from IPython.display import HTML
Ys = []
X = []
model = None
fig, ax, ln = None, None, None
fig = None;
def updateY():
Ys.append([float(model(x)) for x in X])
def initY():
global Ys
Ys = []
updateY()
def animate(gif="", skips = 1):
def update(frame):
ln.set_data(X, Ys[frame])
return ln,
ani = FuncAnimation(fig, update, init_func = lambda: [ln], frames=range(0,len(Ys),skips), blit=True)
if gif:
ani.save(gif, writer='imagemagick', fps=15)
return HTML(ani.to_jshtml())
import random
class Linear:
def __init__(self):
self.a,self.b = Value(random.random()),Value(random.random())
def __call__(self,x): return self.a*x+self.b
def zero_grad(self):
self.a.grad, self.b.grad = 0,0
def loss(y,y_): return (y-y_)**2
n = 20
a_gt , b_gt = 5*random.random(), 5*random.random()
X = [random.random() for i in range(20)]
Y = [a_gt*x + b_gt + 0.5*random.random() for x in X]
print(f"Ground truth: {a_gt:.2f}, {b_gt:.2f}")
fig, ax, ln = plot(X,Y, ylim=(0,10));
Ground truth: 3.30, 1.80
model = Linear()
fig, ax, ln = plot(X,Y, model, ylim=(0,10));
print(f"Initial params: {float(model.a):.2f}, {float(model.b):.2f}")
η = 0.1
epochs = 20
initY()
for t in range(epochs):
for x,y in zip(X,Y):
model.zero_grad()
loss = (model(x)-y)**2
loss.backward()
model.a , model.b = (model.a - η*model.a.grad , model.b - η*model.b.grad)
updateY()
print(f"Final params: {float(model.a):.2f}, {float(model.b):.2f}")
Initial params: 0.22, 0.78
Final params: 3.13, 2.15
# this works pretty well
animate()
# Another dataset
from sklearn.datasets import make_moons, make_blobs
X, Y = make_moons(n_samples=100, noise=0.1)
Y = Y*2 - 1 # make y be -1 or 1
# visualize in 2D
plt.figure(figsize=(5,5))
plt.scatter(X[:,0], X[:,1], c=Y, s=20, cmap='jet')
def Neuron(weights,inputs, relu =True):
v = sum(weights[i]*x for i,x in enumerate(inputs))
return v.relu() if relu else v
class Net:
# Depth 3 fully connected neural net with one two inputs and output
def __init__(self, N=16):
self.layer_1 = [[Value(),Value()] for i in range(N)]
self.layer_2 = [ [Value() for j in range(N)] for i in range(N)]
self.output = [ Value() for i in range(N)]
self.parameters = [v for L in [self.layer_1,self.layer_2,[self.output]] for w in L for v in w]
def __call__(self,x):
layer_1_vals = [Neuron(w,x) for w in self.layer_1]
layer_2_vals = [Neuron(w,layer_1_vals) for w in self.layer_2]
return Neuron(self.output,layer_2_vals,relu=False)
def zero_grad(self):
for p in self.parameters:
p.grad=0
model = Net()
epochs = 8
η = 1
for t in range(epochs):
loss = sum([(1+ -y*model(x)).relu() for (x,y) in zip(X,Y)])/len(X)
model.zero_grad()
loss.backward()
for p in model.parameters:
p.data -= η*p.grad
acc = sum(float(model(x))*y>0 for (x,y) in zip (X,Y))
print(f"Epoch {t} mean loss {loss.data:.3f} Accuracy {acc*100 / len(X):.0f}%")
Epoch 0 mean loss 1.094 Accuracy 76%
Epoch 1 mean loss 0.924 Accuracy 81%
Epoch 2 mean loss 0.403 Accuracy 86%
Epoch 3 mean loss 0.302 Accuracy 86%
Epoch 4 mean loss 0.280 Accuracy 82%
Epoch 5 mean loss 0.413 Accuracy 80%
Epoch 6 mean loss 0.537 Accuracy 82%
Epoch 7 mean loss 0.372 Accuracy 86%
def decision_boundary():
h = 0.25
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
np.arange(y_min, y_max, h))
Xmesh = np.c_[xx.ravel(), yy.ravel()]
inputs = [list(map(Value, xrow)) for xrow in Xmesh]
scores = list(map(model, inputs))
Z = np.array([s.data > 0 for s in scores])
Z = Z.reshape(xx.shape)
fig = plt.figure()
plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral, alpha=0.8)
plt.scatter(X[:, 0], X[:, 1], c=Y, s=40, cmap=plt.cm.Spectral)
plt.xlim(xx.min(), xx.max())
plt.ylim(yy.min(), yy.max())
decision_boundary()