Skip to content
Snippets Groups Projects
Commit a78361b8 authored by GILSON Matthieu's avatar GILSON Matthieu
Browse files

example exercise multilayer

parent 1d925a46
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id:7f27c493-5ce5-4eec-a4e2-68d0a7c8a267 tags:
# Auto-differentiation: first example building a multilayer perceptron from scratch
@author: gilsonmatthieu
%% Cell type:code id:b7879792-1e21-401e-b037-1471ddf2cb91 tags:
``` python
import os
import numpy as np
import matplotlib.pyplot as plt
# working directory
work_dir = 'tmp/'
if not os.path.exists(work_dir):
print('create directory:', work_dir)
os.makedirs(work_dir)
grph_fmt = 'png'
# colors for plotting
cols_gr = []
for ii in [2,1,0]:
cols_gr += [[ii*0.3,ii*0.3,ii*0.3]]
```
%% Cell type:markdown id:6d8fafc1-835d-4b53-b070-a5077f561412 tags:
We first load the MNIST dataset made of handwritten digits.
%% Cell type:code id:57dc9276-317c-4bb4-95e4-69dde16c379c tags:
``` python
# load MNIST dataset
locals().update(np.load('../data/MNIST/mnist.npz'))
# number of samples, pixels, features (when vectorized)
n_train = train_data.shape[0]
n_test = test_data.shape[0]
n_px = train_data.shape[1]
n_feat = n_px**2
# number of categories / classes
n_cat = np.unique(train_labels).size
print('number of classes:', n_cat)
print('classes:', np.unique(train_labels, return_counts=True))
```
%% Output
number of classes: 10
classes: (array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([5923, 6742, 5958, 6131, 5842, 5421, 5918, 6265, 5851, 5949]))
%% Cell type:code id:73df08d3-cb2f-4323-accd-6918d88e7f9a tags:
``` python
from sklearn.datasets import load_digits
mnist = load_digits()
train_data, train_labels = mnist.data, mnist.target
```
%% Cell type:code id:48a81731-4888-469b-ad3a-b4f5dcbed9b0 tags:
``` python
n_train = train_data.shape[0]
n_px = 8
n_feat = train_data.shape[1]
print(n_train, n_feat)
```
%% Output
1797 64
%% Cell type:code id:ce045462-394f-4825-8360-f9c5c665b926 tags:
``` python
i = 0
plt.imshow(train_data[i].reshape([8,8]))
plt.title(train_labels[i])
```
%% Output
Text(0.5, 1.0, '0')
%% Cell type:markdown id:9fc468e6-7917-4889-84fc-b0f7df20c998 tags:
We want to build a network of 2 layers of neurons with a given nonlinear function (e.g. tanh), whose weights are optimized to reduce a loss function (error between output and a corresponding target for each sample).
%% Cell type:code id:b2cb5fa6-b1ad-47c9-9062-f1bca177e52f tags:
``` python
# layer of artificial neurons, batch calculations
class layer:
# forward function
def f(self, x):
# sigmoid
return 1 / (1 + np.exp(-x))
def df_finv(self, y):
# sigmoid
return y * (1 - y)
# forward pass for x of shape (input dim)
def fwd(self, x, W, return_x1=False):
# augmented vector by one extra element (for bias)
x1 = np.ones([x.shape[0]+1])
x1[:-1] = x
# calculate output after weight multiplication and function f
y = self.f(np.dot(W, x1))
# return y or y and x1
if return_x1:
return y, x1
else:
return y
# backward path
def bckwd(self, x, W, tgt):
# calculate output
y, x1 = self.fwd(x, W, return_x1=True)
# calculate derivative term corresponding to loss
err = y - tgt
# calculate weight update
dW = err * self.df_finv(y) * x1
# return weight update
return dW
```
%% Cell type:code id:193f95a4-1f27-45e5-9592-ec449d9408f4 tags:
``` python
l = layer()
```
%% Cell type:code id:c94cd92b-a507-47b2-91cb-199dca458d26 tags:
``` python
vx = np.linspace(-5,5,100)
plt.plot(vx, l.f(vx))
```
%% Output
[<matplotlib.lines.Line2D at 0x7eb405d57040>]
%% Cell type:code id:b860d280-efa3-45dc-bd59-785ecb6e4f4e tags:
``` python
# let's test the forward pass
M = 2 # dimensionality of input x
N = 1 # dimensionality of output y
W = np.array([[1.0, -1.0, 0.5]]) # weights of shape (N,M+1) with bias
# generate random input x
x = np.random.normal(loc=0.0, size=[M])
l = layer()
y = l.fwd(x, W)
print('sample: x={} mapped to y={}'.format(x,y))
```
%% Output
sample: x=[-0.59401376 -0.91878639] mapped to y=[0.6952485]
%% Cell type:code id:4c53fa7c-65fa-4d6d-9b7f-973c01567bfd tags:
``` python
# desired target
tgt = 0
# learning rate
lr = 50
# calculate gradient
dW = l.bckwd(x, W, tgt)
print(dW)
# do the weight update
W += - lr * dW
print(l.fwd(x, W))
```
%% Output
[-7.98169760e-05 -1.23456318e-04 1.34368901e-04]
[0.01149106]
%% Cell type:code id:d5f1d900-9b43-4984-ae80-2060f167523a tags:
``` python
print(np.unique(train_labels))
sel_data = np.logical_or(train_labels==0, train_labels==1)
train_data2 = train_data[sel_data]
train_labels2 = train_labels[sel_data]
print(np.unique(train_labels2))
```
%% Output
[0 1 2 3 4 5 6 7 8 9]
[0 1]
%% Cell type:code id:92500684-9137-4f5d-b035-a6da8a1274d8 tags:
``` python
n_train2 = train_data2.shape[0]
# initial weight
print(n_feat)
W = np.random.randn(n_feat+1)
loss = [] # loss
acc = [] # accuracy
lr = 0.01
```
%% Output
64
%% Cell type:code id:57a5a8a9-81c9-46f9-afe3-96f17c178969 tags:
``` python
loss_tmp = 0.0
acc_tmp = 0
for i in range(n_train2):
# get a sample
x = train_data2[i]
tgt = train_labels2[i]
# calculate loss
y = l.fwd(x, W)
loss_tmp += (tgt - y)**2
pred = int(y>0.5)
acc_tmp += int(tgt==pred)
loss.append(loss_tmp/n_train2)
acc.append(acc_tmp/n_train2)
print(loss, acc)
```
%% Output
[0.5470240782993646] [0.44166666666666665]
%% Cell type:code id:bbf368b0-4e6e-49c1-8195-56cbbbaf9b72 tags:
``` python
# try our layer on mnist
for i in range(n_train2):
# get a sample
x = train_data2[i]
tgt = train_labels2[i]
# calculate gradient and update weight
dW = l.bckwd(x, W, tgt)
W += - lr * dW
loss_tmp = 0.0
acc_tmp = 0
for i in range(n_train2):
# get a sample
x = train_data2[i]
tgt = train_labels2[i]
# calculate loss
y = l.fwd(x, W)
loss_tmp += (tgt - y)**2
pred = int(y>0.5)
acc_tmp += int(tgt==pred)
loss.append(loss_tmp/n_train2)
acc.append(acc_tmp/n_train2)
plt.figure()
plt.plot(loss, label='loss')
plt.plot(acc, label='acc')
plt.legend()
```
%% Output
<matplotlib.legend.Legend at 0x7eb4338fbbe0>
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment