tensor
Introducing pytorch to generate a random 5x3 tensor
>>> from __future__ import print_function >>> import torch >>> x = torch.rand(5, 3) >>> print(x) tensor([[0.5555, 0.7301, 0.5655], [0.9998, 0.1754, 0.7808], [0.5512, 0.8162, 0.6148], [0.8618, 0.3293, 0.6236], [0.2787, 0.0943, 0.2074]])
Declare a 5x3 tensor in which all elements are initialized to zero
>>> x = torch.zeros(5, 3, dtype=torch.long)
Construct the tensor directly from the data, where the data is generally a python array
>>> x = torch.tensor([5.5, 3]) >>> print(x) tensor([5.5000, 3.0000])
Similar to creating a new tensor from an existing tensor, the shape and data type of the new and old tensors are the same unless the dtype is overridden
>>> x = x.new_ones(5, 3, dtype=torch.double) >>> print(x) tensor([[1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.], [1., 1., 1.]], dtype=torch.float64) >>> y = torch.rand_like(x, dtype=torch.float) >>> print(y) tensor([[0.6934, 0.9637, 0.0594], [0.0863, 0.6638, 0.4728], [0.3416, 0.0892, 0.1761], [0.6831, 0.6404, 0.8307], [0.6254, 0.4180, 0.2174]])
Tensor size, numpy is shape
>>> print(x.size()) torch.Size([5, 3])
Tensor operation
Tensor Addition
>>> x=torch.rand(5, 3) >>> y = torch.zeros(5, 3) >>> print(x + y) tensor([[0.8991, 0.9222, 0.2050], [0.2478, 0.7688, 0.4156], [0.4055, 0.9526, 0.2559], [0.9481, 0.8576, 0.4816], [0.0767, 0.3346, 0.0922]]) >>> print(torch.add(x, y)) tensor([[0.8991, 0.9222, 0.2050], [0.2478, 0.7688, 0.4156], [0.4055, 0.9526, 0.2559], [0.9481, 0.8576, 0.4816], [0.0767, 0.3346, 0.0922]]) >>> result = torch.empty(5, 3) >>> torch.add(x, y, out=result) tensor([[0.8991, 0.9222, 0.2050], [0.2478, 0.7688, 0.4156], [0.4055, 0.9526, 0.2559], [0.9481, 0.8576, 0.4816], [0.0767, 0.3346, 0.0922]]) >>> y.add_(x) tensor([[0.8991, 0.9222, 0.2050], [0.2478, 0.7688, 0.4156], [0.4055, 0.9526, 0.2559], [0.9481, 0.8576, 0.4816], [0.0767, 0.3346, 0.0922]])
Element access within a tensor is consistent with numpy, such as all elements where the second dimension superscript and superscript of output tensor y are 1
>>> print(y[:, 1]) tensor([0.9222, 0.7688, 0.9526, 0.8576, 0.3346])
The iew function changes tensor's shape, similar to numpy's reshape
>>> x = torch.randn(4, 4) >>> y = x.view(16) # Becomes a tensor of 1x16 >>> z = x.view(-1, 8) # When the second dimension becomes 8, the first dimension automatically calculates the tensor, resulting in a 2x8 tensor >>> print(x.size(), y.size(), z.size()) torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])
A vector with only one element, take this element
>>> x = torch.randn(1) >>> print(x) tensor([0.8542]) >>> print(x.item()) 0.8541867136955261
Convert to numpy array
>>> x = torch.rand(5, 3) >>> x.numpy() array([[0.9320856 , 0.473859 , 0.6787642 ], [0.14365482, 0.1112923 , 0.8280207 ], [0.4609589 , 0.51031697, 0.15313298], [0.18854082, 0.4548 , 0.49709243], [0.8351501 , 0.6160053 , 0.61391556]], dtype=float32)
All cpu tensors except CharTensor are converted from numpy to tensor
import numpy as np a = np.ones(5) b = torch.from_numpy(a) np.add(a, 1, out=a) print(a) print(b)
Move tensor between cpu and gpu.
if torch.cuda.is_available(): device = torch.device("cuda") # a CUDA device object y = torch.ones_like(x, device=device) # Create directly on the GPU device x = x.to(device) # or just use strings ``.to("cuda")`` z = x + y print(z) print(z.to("cpu", torch.double)) # ``.to`` can also change dtype together!
Building networks and loss functions
The loss function measures the distance between the input and the target
from __future__ import print_function import torch import torch.nn as nn import torch.nn.functional as F class Net(nn.Module): ## Define the structure of the network def __init__(self): super(Net, self).__init__() ## input is channel 1, output 6 channels with 3x3 convulutionanl kernel self.conv1 = nn.Conv2d(1, 6, 3) self.conv2 = nn.Conv2d(6, 16, 3) # an affine operation: y = Wx + b, # 6*6 from image dimension self.fc1 = nn.Linear(16*6*6, 120) self.fc2 = nn.Linear(120, 84) self.fc3 = nn.Linear(84, 10) ## Forward propagation, function name must be forward def forward(self, x): # Max pooling over a (2, 2) window x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2)) # If the size is a square you can only specify a single number x = F.max_pool2d(F.relu(self.conv2(x)), 2) x = x.view(-1, self.num_flat_features(x)) x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.fc3(x) return x def num_flat_features(self, x): size = x.size()[1:] # all dimensions except the batch dimension num_features = 1 for s in size: num_features *= s return num_features ## Create a new Net object net = Net() print(net) params = list(net.parameters()) print(len(params)) print(params[0].size()) # conv1's .weight # Declare a 4D tensor of 1x1x32x32 as input to the network input = torch.randn(1, 1, 32, 32) # input = torch.randn(1, 1, 32, 32) output = net(input) # net.zero_grad() # out.backward(torch.randn(1, 10)) target = torch.randn(10) target = target.view(1, -1) criterion = nn.MSELoss() loss = criterion(output, target) print(loss) print(loss.grad_fn) # MSELoss print(loss.grad_fn.next_functions[0][0]) # Linear print(loss.grad_fn.next_functions[0][0].next_functions[0][0]) # ReLU
Back propagation of a network, all you need to do to reverse propagation loss (error) is call the loss.backward() function. If you do not clear the existing gradient, it will accumulate the gradient
Call the loss.backward() function to see how the bias gradient of conv1 differs before and after the call.
net.zero_grad() # zeroes the gradient buffers of all parameters print('conv1.bias.grad before backward') print(net.conv1.bias.grad) loss.backward() print('conv1.bias.grad after backward') print(net.conv1.bias.grad)
Update weights using SGD
Formula: weight = weight - learning_rate * gradient
You can do this with the torch code below
learning_rate = 0.01 for f in net.parameters(): f.data.sub_(f.grad.data * learning_rate)
However, torch has implemented various weight updates, such as SGD, Nesterov-SGD, Adam, RMSProp, etc., which can be called directly
import torch.optim as optim # create your optimizer optimizer = optim.SGD(net.parameters(), lr=0.01) # in your training loop: optimizer.zero_grad() # zero the gradient buffers output = net(input) loss = criterion(output, target) loss.backward() optimizer.step() # Does the update