Linear regression
Setup¶
In [1]:
Copied!
import numpy as np
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.pyplot as plt
In [2]:
Copied!
# set random seed for reproducibility
np.random.seed(42)
# set random seed for reproducibility
np.random.seed(42)
In [3]:
Copied!
from nnfs.layers import Dense
from nnfs.model import Sequential
from nnfs.losses import MSE
from nnfs.optimizers import SGD
from nnfs.datasets.data_generators import generate_linear_data
from nnfs.layers import Dense
from nnfs.model import Sequential
from nnfs.losses import MSE
from nnfs.optimizers import SGD
from nnfs.datasets.data_generators import generate_linear_data
Data generation¶
Let us consider a linear regression problem
In [4]:
Copied!
# generate data
X_data, y_true = generate_linear_data(3, -7, 5000)
# plotdata
plt.scatter(X_data, y_true, s=0.5)
None
# generate data
X_data, y_true = generate_linear_data(3, -7, 5000)
# plotdata
plt.scatter(X_data, y_true, s=0.5)
None
Model specification¶
Instead of manually deriving expressions for the derivative of the loss with respect to the model parameters (see the Introduction to gradients notebook), we can now use a generalized neural network model that will compute derivatives automatically.
In [5]:
Copied!
# define the model
list_layers = [Dense(1, 1)]
loss = MSE()
optimizer = SGD()
model = Sequential(list_layers, loss, optimizer)
model.summary()
# define the model
list_layers = [Dense(1, 1)]
loss = MSE()
optimizer = SGD()
model = Sequential(list_layers, loss, optimizer)
model.summary()
Model layers:
* Dense_0 | Dimensions: 1 x 1 | Parameters: 2
--------------------
Total parameters: 2
In [6]:
Copied!
# check initial weights/parameters (randomly initialized)
model.layers[0].trainable
# check initial weights/parameters (randomly initialized)
model.layers[0].trainable
Out[6]:
[('W', array([[0.16823658]]), array([[0.]])),
('b', array([[0.]]), array([[0.]]))]
Run one training step¶
Once the model is defined, here is how to run a complete round of optimization.
In [7]:
Copied!
# produce predictions
y_pred = model.forward(X_data)
y_pred.shape
# produce predictions
y_pred = model.forward(X_data)
y_pred.shape
Out[7]:
(5000, 1)
In [8]:
Copied!
# compute loss
loss = model.loss.forward(y_pred, y_true)
print(f"Initial loss: {loss}")
# compute loss
loss = model.loss.forward(y_pred, y_true)
print(f"Initial loss: {loss}")
Initial loss: 121.30301609014472
In [9]:
Copied!
# compute d_loss / d_ypred
d_loss = model.loss.backward()
d_loss.shape
# compute d_loss / d_ypred
d_loss = model.loss.backward()
d_loss.shape
Out[9]:
(5000, 1)
In [10]:
Copied!
# compute all the gradients dL/dw
grad = model.backward(d_loss)
grad.shape
# compute all the gradients dL/dw
grad = model.backward(d_loss)
grad.shape
Out[10]:
(5000, 1)
In [11]:
Copied!
# update weights
model.update_weights()
# update weights
model.update_weights()
In [12]:
Copied!
# check new, updated parameters
model.layers[0].trainable
# check new, updated parameters
model.layers[0].trainable
Out[12]:
[('W', array([[1.35411079]]), array([[-118.58742118]])),
('b', array([[0.14340042]]), array([[-14.34004193]]))]
In [13]:
Copied!
# re-compute lsos (it should be lower!)
y_pred = model.forward(X_data)
loss = model.loss.forward(y_pred, y_true)
print(f"Loss after one training step: {loss}")
# re-compute lsos (it should be lower!)
y_pred = model.forward(X_data)
loss = model.loss.forward(y_pred, y_true)
print(f"Loss after one training step: {loss}")
Loss after one training step: 27.219272518395844
Run training epoch¶
In [14]:
Copied!
# we can run all required training steps in one convenient function
loss = model.run_training_epoch(X_data, y_true)
print(f"Loss after two training steps: {loss}")
# we can run all required training steps in one convenient function
loss = model.run_training_epoch(X_data, y_true)
print(f"Loss after two training steps: {loss}")
Loss after two training steps: 17.584748557184327
Model training¶
Let us now train our model until it arrives to a good enough solution.
In [15]:
Copied!
# let us run several epochs!
history = model.fit(X_data, y_true, 1000, debug_flag=True)
# let us run several epochs!
history = model.fit(X_data, y_true, 1000, debug_flag=True)
Epoch 1 - Loss: 16.499602224296943 Epoch 100 - Loss: 8.673907958904849 Epoch 200 - Loss: 5.7331580557499615 Epoch 300 - Loss: 4.630050662126348 Epoch 400 - Loss: 4.216263035860806 Epoch 500 - Loss: 4.061046779818835 Epoch 600 - Loss: 4.00282346764027 Epoch 700 - Loss: 3.980983267602678 Epoch 800 - Loss: 3.972790770133295 Epoch 900 - Loss: 3.969717674892852 Epoch 1000 - Loss: 3.968564923318362
In [16]:
Copied!
# get loss values from history
loss = history["loss"]
# compute average loss between batches
avg_loss = loss.mean(axis=1)
# plot
plt.plot(avg_loss)
plt.xlabel("Epoch")
plt.ylabel("Loss")
None
# get loss values from history
loss = history["loss"]
# compute average loss between batches
avg_loss = loss.mean(axis=1)
# plot
plt.plot(avg_loss)
plt.xlabel("Epoch")
plt.ylabel("Loss")
None
Evaluation¶
After training the model, we can check how well it performs by comparing its predictions to the actual outputs in the training data.
In [17]:
Copied!
# let us print the final optimized parameters
for name, param, grad in model.layers[0].trainable:
print(f"{name}: {param.item():.2f}")
# let us print the final optimized parameters
for name, param, grad in model.layers[0].trainable:
print(f"{name}: {param.item():.2f}")
W: 2.97 b: -6.84
In [18]:
Copied!
# how well do these parameters fit our data?
y_pred = model.forward(X_data)
plt.scatter(X_data, y_true, s=0.5)
plt.plot(X_data, y_pred, color='orange')
None
# how well do these parameters fit our data?
y_pred = model.forward(X_data)
plt.scatter(X_data, y_true, s=0.5)
plt.plot(X_data, y_pred, color='orange')
None