Comparacion de propagar los gradientes manualmente y de forma automática
Ya implementado autograd con las funciones básicas de tensores probemos comparemos el codigo de una red neuronal lineal simple implementando manualmente backprop
para la misma y utilizando la nueva clase Tensor
que implementamos
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
sys.path.append(module_path)
import numpy as np
import copy
tensor_1 = np.array([[0.5488135,0.71518937,0.60276338],
[0.54488318,0.4236548,0.64589411]])
tensor_2 = np.array([[0.43758721],
[0.891773 ],
[0.96366276]])
Backprop manual¶
import numpy as np
np.random.seed(0)
data = np.array([[0,0],[0,1],[1,0],[1,1]]) # (4,2)
target = np.array([[0],[1],[0],[1]]) # (4,1)
# weights_0_1 = np.random.rand(2,3) # (2,3)
# weights_1_2 = np.random.rand(3,1) # (3,1)
weights_0_1 = copy.deepcopy(tensor_1) # (2,3)
weights_1_2 = copy.deepcopy(tensor_2) # (3,1)
alpha = 0.1
for i in range(10):
# Forward prop
layer_1 = np.dot(data, weights_0_1) # (4,3)
layer_2 = np.dot(layer_1, weights_1_2) # (4,1)
diff = layer_2 - target # (4,1)
loss = (diff * diff).sum(0)
# Backprop
layer_1_grad = np.dot(diff, weights_1_2.T) # (4,3)
weight_1_2_update = np.dot(layer_1.T, diff) # (3,1)
weight_0_1_update = np.dot(data.T, layer_1_grad) # (2,3)
weights_1_2 = weights_1_2 - alpha * weight_1_2_update
weights_0_1 = weights_0_1 - alpha * weight_0_1_update
print(loss)
Backprop usando autograd
¶
import numpy as np
from lightdlf_old.cpu.core import Tensor
np.random.seed(0)
data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True) # (4,2)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True) # (4,1)
w = list()
# w.append(Tensor(np.random.rand(2,3), autograd=True)) # (2,3)
# w.append(Tensor(np.random.rand(3,1), autograd=True)) # (3,1)
w.append(Tensor(copy.deepcopy(tensor_1), autograd=True)) # (2,3)
w.append(Tensor(copy.deepcopy(tensor_2), autograd=True)) # (3,1)
alpha = 0.1
for i in range(10):
pred = data.mm(w[0]).mm(w[1]) # prediccion
# print(pred)
loss = ((pred - target) * (pred - target)).sum(0) # funcion de perdida o loss function
loss.backward(Tensor(np.ones_like(loss.data))) # retropropagacion de gradiente
for w_ in w: # aprendizaje
w_.data = w_.data - (alpha * w_.grad.data)
w_.grad.data *= 0
print(loss)
Ademas de haber hecho retropropagacion automáticamente, también se agregó la abstraccion de agregar todos los pesos a una lista, esto permite iterar por todos los pesos más facilmente para efectuar las actualizaciones de los pesos.