Relationship Fitting (Regression)

Setting up datasets

Create some fake data to simulate the real situation. For example, a quadratic function of one variable: y = a * x^2 + b, add a little noise to the Y data to show it more realistically.

import torch
from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
#fake data
#unsqueeze transforms one dimension into two dimensions
#   Quadratic+Noise Effect

#Scatter plot

Establishing a neural network

Establishing a neural network can directly apply the torch system. First all the layer attributes (init()) are defined, then the layer-by-layer relationship links (forward(x)) are built.

import torch

from torch.autograd import Variable
import torch.nn.functional as F
import matplotlib.pyplot as plt
class Net(torch.nn.Module):#Module is the main module of net
    #Two functions required to build torch
    def __init__(self,n_feature,n_hidden,n_output):#Information needed to set up the net
        super(Net,self).__init__()#Inherit net to Module module and output init function
        #Below is the layer information, which is all attributes
        self.hidden=torch.nn.Linear(n_feature,n_hidden)#How many inputs and how many outputs
        self.predict = torch.nn.Linear(n_hidden,n_output)
    def forward(self,x):#The process of forward propagation, forward propagation of input values, and neural network analysis of output values
       x=F.relu(self.hidden(x))#x outputs n_hidden through hidden, then nests with relu activation function
       x=self.predict(x)#Put the above X in predict Output Layer Output x
       #The output layer does not use the excitation function, because in most regression problems
       #In most regression problems, the predicted value is distributed from positive to negative infinity.
       #Use the excitation function to truncate the value a little, predict doesn't like the result of truncation
       return x

#Define net
#!!!!Don't hit in class
net=Net(n_feature=1,n_hidden=10,n_output=1)#x value 1, hidden layer (neuron 10), y1

plt.ion()   #Real-time Drawing

#Optimize Network
optimizer=torch.optim.SGD(net.parameters(),lr=0.2)#Higher, faster
#Using optim optimizer to optimize the parameters of the neural network, the learning efficiency is 0.2
loss_func=torch.nn.MSELoss()#Mean Square Variance of MSELoss() for Optimizing Errors

#Start training
for t in  range(100):#100 steps
    prediction=net(x)#Input Information Prediction Value

    loss=loss_func(prediction,y)#Errors in prediction and y prediction

    optimizer.zero_grad()#All parameter gradients are reduced to zero
    loss.backward()#Reverse transfer to compute gradient for each neural network node
    optimizer.step()#Optimal Gradient

    if t % 5 == 0:
        # plot and show learning process
        plt.plot(,, 'r-', lw=5)
        plt.text(0.2, 0, 'Loss=%.4f' %, fontdict={'size': 20, 'color':  'red'})

Distinguish types (classifications)

Create some fake data to simulate the real situation. For example, two quadratic distributions of data, but their mean values are different. Establish a neural network that directly uses the system in torch. Define all layer attributes (init()) first, and then build (forward(x)) layer-by-layer relationship links.
This is basically the same as the neural network used in the previous regression.

import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F

# False Data
n_data = torch.ones(100, 2)         # The basic form of the data x contains a horizontal and vertical coordinate y of type
x0 = torch.normal(2*n_data, 1)      # Type 0 x data (tensor), shape=(100, 2)
y0 = torch.zeros(100)               # Label 0
x1 = torch.normal(-2*n_data, 1)     # Type 1 x data (tensor), shape=(100, 1)
y1 = torch.ones(100)                # Label 1

# Note that x, y data must be in the form of the following ( is merging data)
x =, x1), 0).type(torch.FloatTensor)  #The data must be FloatTensor = 32-bit floating
            #x Merge together to make data

y =, y1), ).type(torch.LongTensor)    # Label must be LongTensor = 64-bit integer
            # y merge together to label 0.
# plt.scatter([:, 0],[:, 1],, s=100, lw=0, cmap='RdYlGn')

class Net(torch.nn.Module):     # Module inheriting torch
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()     # Inherit u init_u Function
        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # Hidden Layer Linear Output
        self.out = torch.nn.Linear(n_hidden, n_output)       # Output Layer Linear Output

    def forward(self, x):
        # Forward propagation of input values and output values from neural network analysis
        x = F.relu(self.hidden(x))      # Excitation Function (Linear Value of Hidden Layer)
        x = self.out(x)                 # Output value, but this is not a predicted value and the predicted value needs to be calculated
        return x

net = Net(n_feature=2, n_hidden=10, n_output=2) # Several categories are just a few output s
        #The x input is 2 features (x, y coordinates) and the output is 2 features (0 and you)

print(net)  # The structure of net

# optimizer is a training tool
optimizer = torch.optim.SGD(net.parameters(), lr=0.02)  # All parameters passed into net, learning rate
loss_func = torch.nn.CrossEntropyLoss()
#CrossEntropyLoss() Action Label Value: [0,0,1] Predictive Value: [0.1,0.3,0.6] Error between the two

plt.ion()   # Drawing

for t in range(100):
    out = net(x)     # Feed net training data x, output analysis value

    loss = loss_func(out, y)     # Calculating the Error of Both

    optimizer.zero_grad()   # Empty the remaining update parameter values from the previous step
    loss.backward()         # Error back-propagation, calculating parameter update values
    optimizer.step()        # Applying parameter update values to net's parameters

    if t % 2 == 0:
        prediction = torch.max(out, 1)[1]
        #The location of the maximum value is where the index is 1
        pred_y =
        target_y =
        plt.scatter([:, 0],[:, 1], c=pred_y, s=100, lw=0, cmap='RdYlGn')
        accuracy = float((pred_y == target_y).astype(int).sum()) / float(target_y.size)
        plt.text(1.5, -4, 'Accuracy=%.2f' % accuracy, fontdict={'size': 20, 'color': 'red'})


plt.ioff()  # Stop drawing

Quick build

The method used in regression inherits a torch's neural network structure with class, then modifies it to quickly build and summarize all of the above in one sentence!
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)
        self.predict = torch.nn.Linear(n_hidden, n_output)

    def forward(self, x):
        x = F.relu(self.hidden(x))
        x = self.predict(x)
        return x

net1 = Net(1, 10, 1)   # This is how we built net1

Quick build

net2 = torch.nn.Sequential(
    torch.nn.Linear(1, 10),
    torch.nn.Linear(10, 1)

Net (
  (hidden): Linear (1 -> 10)
  (predict): Linear (10 -> 1)
Sequential (
  (0): Linear (1 -> 10)
  (1): ReLU ()
  (2): Linear (10 -> 1)

net2 also includes the excitation function, but in net1, the excitation function is actually called in the forward() function.
This also shows that the advantage of Net1 over net2 is that you can personalize your own forward communication process, such as (RNN), to suit your personal needs.
However, net2 is more appropriate if you don't need a seven-eighty-eight process.

Save Extraction

A model has been trained and we want to save it so that we can use it directly the next time we want to use it. Here we use a regressed neural network as an example to save and extract.
A,'net.pkl') that holds the entire neural network
A parameter,'net_params.pkl') that stores neural networks

import torch

import matplotlib.pyplot as plt

#fake data
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)
#unsqueeze transforms one dimension into two dimensions
y = x.pow(2) + 0.2*torch.rand(x.size())
#   Quadratic+Noise Effect

#Save data
def save():
    #Build a network
    #Optimize Network

    # Start training
    for t in range(100):  # 100 steps
        prediction =net1(x)  # Input Information Prediction Value
        loss = loss_func(prediction, y)  # Errors in prediction and y prediction
        optimizer.zero_grad()  # All parameter ladder loss = loss_func(prediction, y) degree reduced to 0
        loss.backward()  # Reverse transfer to compute gradient for each neural network node
        optimizer.step()  # Optimal Gradient

     # Mapping
    plt.figure(1, figsize=(10, 3))
    plt.plot(,, 'r-', lw=5),'net.pkl')#Save the entire neural network
                    #Saved Name,'net_params.pkl')#Save parameters of the entire neural network

def restore_net():
    # Mapping

    plt.plot(,, 'r-', lw=5)

def restore_parames():
    #To extract parameters from net3, a network like net1 must be built first
    #Then copy the parameters of net1 to net3
    net3 = torch.nn.Sequential(
        torch.nn.Linear(1, 10),
        torch.nn.Linear(10, 1)
    )#But the parameters inside are definitely different
    # Mapping

    plt.plot(,, 'r-', lw=5)




Batch Training


DataLoader is the tool torch gives you to wrap your data. So swap your own (numpy array or other) data format into Tensor and put it in this wrapper. DataLoader helps you iterate through your data effectively.

#Batch Processing
import torch
import as Data

BATCH_SIZE=5#A stack of data is divided into five groups and five groups

x=torch.linspace(1,10,10)#Divide from 1 to 10 into 10 points
y=torch.linspace(10,1,10)#From 10 to 1.

#Define a database
torch_dataset=Data.TensorDataset(x,y)#x training data y target data
#Use loader to turn training into a small batch
    shuffle=True,#Do not randomly shuffle data during training and start sampling false again as no shuffling


for epoch in range(3):#Training these 10 data three times as a whole
    for step,(batch_x,batch_y) in enumerate(loader):#Total training 3 times Each training is divided into 2 parts
        #The loader defines whether to shuffle the data without shuffling the data. The form of each training is the same: training data point 1 before training data point 2.
        #enumerate gives him an index each time it extracts. The first step is
        print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
              batch_x.numpy(), '| batch y: ', batch_y.numpy())
. . . 
Epoch:  0 | Step:  0 | batch x:  [3. 5. 1. 4. 8.] | batch y:  [ 8.  6. 10.  7.  3.]
Epoch:  0 | Step:  1 | batch x:  [ 2.  6. 10.  9.  7.] | batch y:  [9. 5. 1. 2. 4.]
Epoch:  1 | Step:  0 | batch x:  [ 4.  2.  6.  7. 10.] | batch y:  [7. 9. 5. 4. 1.]
Epoch:  1 | Step:  1 | batch x:  [9. 5. 8. 1. 3.] | batch y:  [ 2.  6.  3. 10.  8.]
Epoch:  2 | Step:  0 | batch x:  [4. 7. 6. 2. 8.] | batch y:  [7. 4. 5. 9. 3.]
Epoch:  2 | Step:  1 | batch x:  [10.  9.  1.  3.  5.] | batch y:  [ 1.  2. 10.  8.  6.]

If BATCH_SIZE=5 is changed to 8 output

Epoch:  0 | Step:  0 | batch x:  [2. 1. 5. 9. 7. 8. 4. 6.] | batch y:  [ 9. 10.  6.  2.  4.  3.  7.  5.]
Epoch:  0 | Step:  1 | batch x:  [10.  3.] | batch y:  [1. 8.]
Epoch:  1 | Step:  0 | batch x:  [4. 8. 6. 7. 1. 3. 2. 9.] | batch y:  [ 7.  3.  5.  4. 10.  8.  9.  2.]
Epoch:  1 | Step:  1 | batch x:  [ 5. 10.] | batch y:  [6. 1.]
Epoch:  2 | Step:  0 | batch x:  [ 3.  5.  9.  7. 10.  4.  6.  1.] | batch y:  [ 8.  6.  2.  4.  1.  7.  5. 10.]
Epoch:  2 | Step:  1 | batch x:  [2. 8.] | batch y:  [9. 3.]

Only the data left in this epoch will be returned to you at step=1


SGD is the most common optimizer, or has no acceleration effect, while Momentum is an improved version of SGD that incorporates a momentum principle.
The following RMprop is an upgrade of Momentum, while Adam is an upgrade of RMSprop.
However, from this result we can see that Adam seems to be less effective than RMSprop. So the more advanced the optimizer, the better the result.
We can try different optimizers in our own experiment to find the one that best suits your data/network.

import torch
import as Data
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt


#Regressed data
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
y = x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))

# plt.scatter(x.numpy(),y.numpy())

#Define a database
    num_workers=2,#Work Process

#Establishing a neural network
class Net(torch.nn.Module):
    def __init__(self):
        self.hidden=torch.nn.Linear(1,20)#Hidden Layer
        self.predict=torch.nn.Linear(20,1)#output layer
    def forward(self,x):
        return x
#Four different neural networks
if __name__ == '__main__':
    net_SGD = Net()
    net_Momentum= Net()
    net_RMSprop= Net()
    net_Adam= Net()
    nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam]

    opt_SGD= torch.optim.SGD(net_SGD.parameters(), lr=LR)
    opt_Momentum= torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
    opt_RMSprop= torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)
    opt_Adam= torch.optim.Adam(net_Adam.parameters(), lr=LR, betas=(0.9, 0.99))
    optimizers=[opt_SGD, opt_Momentum, opt_RMSprop, opt_Adam]

    #Start training
    loss_func = torch.nn.MSELoss()#Error calculation formula for regression
    losses_his = [[], [], [], []]   # loss of different neural networks when recording training s

    for epoch in range(EPOCH):
        print('Epoch: ', epoch)
        for step, (b_x, b_y) in enumerate(loader):
        #Take out the different neural networks one by one
          for net, opt, l_his in zip(nets, optimizers, losses_his):
              output = net(b_x)  # Input Information Prediction Value
              loss = loss_func(output, b_y)  # Errors in prediction and y prediction

              opt.zero_grad()   # All parameter gradients are reduced to zero
              loss.backward()  # Reverse transfer to compute gradient for each neural network node
              opt.step()  # Optimal Gradient
              l_his.append( errors in the record
    labels = ['SGD', 'Momentum', 'RMSprop', 'Adam']
    for i, l_his in enumerate(losses_his):
        plt.plot(l_his, label=labels[i])
    plt.ylim((0, 0.2))

