本博文参考七月在线pytorch课程
1.numpy和pytorch实现梯度下降法使用numpy实现简单神经网络
import numpy as npN, D_in, H, D_out = 64, 1000, 100, 10# 随机创建一些训练数据x = np.random.randn(N, D_in)y = np.random.randn(N, D_out)w1 = np.random.randn(D_in, H)w2 = np.random.randn(H, D_out)learning_rate = 1e-6for it in range(500): # Forward pass h = x.dot(w1) # N * H h_relu = np.maximum(h, 0) # N * H y_pred = h_relu.dot(w2) # N * D_out # compute loss loss = np.square(y_pred - y).sum() print(it, loss) # Backward pass # compute the gradient grad_y_pred = 2.0 * (y_pred - y) grad_w2 = h_relu.T.dot(grad_y_pred) grad_h_relu = grad_y_pred.dot(w2.T) grad_h = grad_h_relu.copy() grad_h[h<0] = 0 grad_w1 = x.T.dot(grad_h) # update weights of w1 and w2 w1 -= learning_rate * grad_w1 w2 -= learning_rate * grad_w2
使用pytorch实现简单神经网络
N, D_in, H, D_out = 64, 1000, 100, 10# 随机创建一些训练数据x = torch.randn(N, D_in)y = torch.randn(N, D_out)w1 = torch.randn(D_in, H)w2 = torch.randn(H, D_out)learning_rate = 1e-6for it in range(500): # Forward pass h = x.mm(w1) # N * H h_relu = h.clamp(min=0) # N * H y_pred = h_relu.mm(w2) # N * D_out # compute loss loss = (y_pred - y).pow(2).sum().item() print(it, loss) # Backward pass # compute the gradient grad_y_pred = 2.0 * (y_pred - y) grad_w2 = h_relu.t().mm(grad_y_pred) grad_h_relu = grad_y_pred.mm(w2.t()) grad_h = grad_h_relu.clone() grad_h[h<0] = 0 grad_w1 = x.t().mm(grad_h) # update weights of w1 and w2 w1 -= learning_rate * grad_w1 w2 -= learning_rate * grad_w2
设定初始值
#numpyx = np.random.randn(N, D_in)y = np.random.randn(N, D_out)w1 = np.random.randn(D_in, H)w2 = np.random.randn(H, D_out)#pytorchx = torch.randn(N, D_in)y = torch.randn(N, D_out)w1 = torch.randn(D_in, H)w2 = torch.randn(H, D_out)