Comparacion de propagar los gradientes manualmente y de forma automática

Ya implementado autograd con las funciones básicas de tensores probemos comparemos el codigo de una red neuronal lineal simple implementando manualmente backprop para la misma y utilizando la nueva clase Tensorque implementamos

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
import numpy as np
import copy
tensor_1 = np.array([[0.5488135,0.71518937,0.60276338],
                     [0.54488318,0.4236548,0.64589411]])
tensor_2 = np.array([[0.43758721],
                     [0.891773  ],
                     [0.96366276]])

Backprop manual

import numpy as np
np.random.seed(0)

data = np.array([[0,0],[0,1],[1,0],[1,1]])              # (4,2)
target = np.array([[0],[1],[0],[1]])                    # (4,1)

# weights_0_1 = np.random.rand(2,3)                       # (2,3)
# weights_1_2 = np.random.rand(3,1)                       # (3,1)
weights_0_1 = copy.deepcopy(tensor_1)                   # (2,3)
weights_1_2 = copy.deepcopy(tensor_2)                   # (3,1)
alpha = 0.1

for i in range(10):
    # Forward prop
    layer_1 = np.dot(data, weights_0_1)                 # (4,3)
    layer_2 = np.dot(layer_1, weights_1_2)              # (4,1)
    
    diff = layer_2 - target                             # (4,1)
    loss = (diff * diff).sum(0)
    
    # Backprop
    layer_1_grad = np.dot(diff, weights_1_2.T)          # (4,3)
    weight_1_2_update = np.dot(layer_1.T, diff)         # (3,1)
    weight_0_1_update = np.dot(data.T, layer_1_grad)    # (2,3)
    
    weights_1_2 = weights_1_2 - alpha * weight_1_2_update 
    weights_0_1 = weights_0_1 - alpha * weight_0_1_update
    
    print(loss)
[5.06644]
[0.49599078]
[0.41806719]
[0.35298133]
[0.29725497]
[0.24923261]
[0.20785392]
[0.17231261]
[0.14193745]
[0.1161398]

Backprop usando autograd

import numpy as np
from lightdlf_old.cpu.core import Tensor
np.random.seed(0)

data = Tensor(np.array([[0,0],[0,1],[1,0],[1,1]]), autograd=True)   # (4,2)
target = Tensor(np.array([[0],[1],[0],[1]]), autograd=True)         # (4,1)

w = list()
# w.append(Tensor(np.random.rand(2,3), autograd=True))                # (2,3)
# w.append(Tensor(np.random.rand(3,1), autograd=True))                # (3,1)
w.append(Tensor(copy.deepcopy(tensor_1), autograd=True))            # (2,3)
w.append(Tensor(copy.deepcopy(tensor_2), autograd=True))            # (3,1)
alpha = 0.1


for i in range(10):
    pred = data.mm(w[0]).mm(w[1])                       # prediccion
#     print(pred)
    
    loss = ((pred - target) * (pred - target)).sum(0)   # funcion de perdida o loss function
    
    loss.backward(Tensor(np.ones_like(loss.data)))      # retropropagacion de gradiente 
    for w_ in w:                                        # aprendizaje
        w_.data = w_.data - (alpha * w_.grad.data)
        w_.grad.data *= 0
        
    print(loss)
[5.06644]
[1.72520804]
[0.97072979]
[0.44845782]
[0.19705059]
[0.11889682]
[0.0785371]
[0.05072462]
[0.03190535]
[0.01958509]

Ademas de haber hecho retropropagacion automáticamente, también se agregó la abstraccion de agregar todos los pesos a una lista, esto permite iterar por todos los pesos más facilmente para efectuar las actualizaciones de los pesos.