# Establishing the first neural network

Posted by CookieDoh on Wed, 12 Feb 2020 04:07:52 +0100

# Relationship Fitting (Regression)

## Setting up datasets

Create some fake data to simulate the real situation. For example, a quadratic function of one variable: y = a * x^2 + b, add a little noise to the Y data to show it more realistically.

```#Relationship Fitting (Regression)
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
#fake data
x=torch.unsqueeze(torch.linspace(-1,1,100),dim=1)
#unsqueeze transforms one dimension into two dimensions
y=x.pow(2)+0.2*torch.rand(x.size())

plt.scatter(x.data.numpy(),y.data.numpy())
#Scatter plot
plt.show()```

## Establishing a neural network

Establishing a neural network can directly apply the torch system. First all the layer attributes (init()) are defined, then the layer-by-layer relationship links (forward(x)) are built.

```#Relationship Fitting (Regression)
import torch

import torch.nn.functional as F
import matplotlib.pyplot as plt
class Net(torch.nn.Module):#Module is the main module of net
#Two functions required to build torch
def __init__(self,n_feature,n_hidden,n_output):#Information needed to set up the net
super(Net,self).__init__()#Inherit net to Module module and output init function
#Below is the layer information, which is all attributes
self.hidden=torch.nn.Linear(n_feature,n_hidden)#How many inputs and how many outputs
self.predict = torch.nn.Linear(n_hidden,n_output)
def forward(self,x):#The process of forward propagation, forward propagation of input values, and neural network analysis of output values
x=F.relu(self.hidden(x))#x outputs n_hidden through hidden, then nests with relu activation function
x=self.predict(x)#Put the above X in predict Output Layer Output x
#The output layer does not use the excitation function, because in most regression problems
#In most regression problems, the predicted value is distributed from positive to negative infinity.
#Use the excitation function to truncate the value a little, predict doesn't like the result of truncation
return x

#Define net
#!!!!Don't hit in class
net=Net(n_feature=1,n_hidden=10,n_output=1)#x value 1, hidden layer (neuron 10), y1
print(net)

plt.ion()   #Real-time Drawing
plt.show()

#Optimize Network
optimizer=torch.optim.SGD(net.parameters(),lr=0.2)#Higher, faster
#Using optim optimizer to optimize the parameters of the neural network, the learning efficiency is 0.2
loss_func=torch.nn.MSELoss()#Mean Square Variance of MSELoss() for Optimizing Errors

#Start training
for t in  range(100):#100 steps
prediction=net(x)#Input Information Prediction Value

loss=loss_func(prediction,y)#Errors in prediction and y prediction

loss.backward()#Reverse transfer to compute gradient for each neural network node

if t % 5 == 0:
# plot and show learning process
plt.cla()
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.text(0.2, 0, 'Loss=%.4f' % loss.data, fontdict={'size': 20, 'color':  'red'})
plt.pause(0.1)
plt.ioff()
plt.show()``` # Distinguish types (classifications) Create some fake data to simulate the real situation. For example, two quadratic distributions of data, but their mean values are different. Establish a neural network that directly uses the system in torch. Define all layer attributes (init()) first, and then build (forward(x)) layer-by-layer relationship links.
This is basically the same as the neural network used in the previous regression.

```import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F

# False Data
n_data = torch.ones(100, 2)         # The basic form of the data x contains a horizontal and vertical coordinate y of type
x0 = torch.normal(2*n_data, 1)      # Type 0 x data (tensor), shape=(100, 2)
y0 = torch.zeros(100)               # Label 0
x1 = torch.normal(-2*n_data, 1)     # Type 1 x data (tensor), shape=(100, 1)
y1 = torch.ones(100)                # Label 1

# Note that x, y data must be in the form of the following (torch.cat is merging data)
x = torch.cat((x0, x1), 0).type(torch.FloatTensor)  #The data must be FloatTensor = 32-bit floating
#x Merge together to make data

y = torch.cat((y0, y1), ).type(torch.LongTensor)    # Label must be LongTensor = 64-bit integer
# y merge together to label 0.
# plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=y.data.numpy(), s=100, lw=0, cmap='RdYlGn')
# plt.show()

class Net(torch.nn.Module):     # Module inheriting torch
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()     # Inherit u init_u Function
self.hidden = torch.nn.Linear(n_feature, n_hidden)   # Hidden Layer Linear Output
self.out = torch.nn.Linear(n_hidden, n_output)       # Output Layer Linear Output

def forward(self, x):
# Forward propagation of input values and output values from neural network analysis
x = F.relu(self.hidden(x))      # Excitation Function (Linear Value of Hidden Layer)
x = self.out(x)                 # Output value, but this is not a predicted value and the predicted value needs to be calculated
return x

net = Net(n_feature=2, n_hidden=10, n_output=2) # Several categories are just a few output s
#The x input is 2 features (x, y coordinates) and the output is 2 features (0 and you)

print(net)  # The structure of net

# optimizer is a training tool
optimizer = torch.optim.SGD(net.parameters(), lr=0.02)  # All parameters passed into net, learning rate
loss_func = torch.nn.CrossEntropyLoss()
#CrossEntropyLoss() Action Label Value: [0,0,1] Predictive Value: [0.1,0.3,0.6] Error between the two

plt.ion()   # Drawing
plt.show()

for t in range(100):
out = net(x)     # Feed net training data x, output analysis value

loss = loss_func(out, y)     # Calculating the Error of Both

optimizer.zero_grad()   # Empty the remaining update parameter values from the previous step
loss.backward()         # Error back-propagation, calculating parameter update values
optimizer.step()        # Applying parameter update values to net's parameters

if t % 2 == 0:
plt.cla()
prediction = torch.max(out, 1)
#The location of the maximum value is where the index is 1
pred_y = prediction.data.numpy().squeeze()
target_y = y.data.numpy()
plt.scatter(x.data.numpy()[:, 0], x.data.numpy()[:, 1], c=pred_y, s=100, lw=0, cmap='RdYlGn')
accuracy = float((pred_y == target_y).astype(int).sum()) / float(target_y.size)
plt.text(1.5, -4, 'Accuracy=%.2f' % accuracy, fontdict={'size': 20, 'color': 'red'})

plt.pause(0.1)

plt.ioff()  # Stop drawing
plt.show()```

# Quick build

```The method used in regression inherits a torch's neural network structure with class, then modifies it to quickly build and summarize all of the above in one sentence!
```
```class Net(torch.nn.Module):
def __init__(self, n_feature, n_hidden, n_output):
super(Net, self).__init__()
self.hidden = torch.nn.Linear(n_feature, n_hidden)
self.predict = torch.nn.Linear(n_hidden, n_output)

def forward(self, x):
x = F.relu(self.hidden(x))
x = self.predict(x)
return x

net1 = Net(1, 10, 1)   # This is how we built net1```

Quick build

```net2 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)

print(net1)
"""
Net (
(hidden): Linear (1 -> 10)
(predict): Linear (10 -> 1)
)
"""
print(net2)
"""
Sequential (
(0): Linear (1 -> 10)
(1): ReLU ()
(2): Linear (10 -> 1)
)
"""```

net2 also includes the excitation function, but in net1, the excitation function is actually called in the forward() function.
This also shows that the advantage of Net1 over net2 is that you can personalize your own forward communication process, such as (RNN), to suit your personal needs.
However, net2 is more appropriate if you don't need a seven-eighty-eight process.

# Save Extraction

A model has been trained and we want to save it so that we can use it directly the next time we want to use it. Here we use a regressed neural network as an example to save and extract.
A torch.save(net1,'net.pkl') that holds the entire neural network
A parameter torch.save(net1.state_dict(),'net_params.pkl') that stores neural networks

```import torch

import matplotlib.pyplot as plt

#fake data
x = torch.unsqueeze(torch.linspace(-1, 1, 100), dim=1)
#unsqueeze transforms one dimension into two dimensions
y = x.pow(2) + 0.2*torch.rand(x.size())

#Save data
def save():
#Build a network
net1=torch.nn.Sequential(
torch.nn.Linear(1,10),
torch.nn.ReLU(),
torch.nn.Linear(10,1)
)
#Optimize Network
optimizer=torch.optim.SGD(net1.parameters(),lr=0.2)
loss_func=torch.nn.MSELoss()

# Start training
for t in range(100):  # 100 steps
prediction =net1(x)  # Input Information Prediction Value
loss = loss_func(prediction, y)  # Errors in prediction and y prediction
optimizer.zero_grad()  # All parameter ladder loss = loss_func(prediction, y) degree reduced to 0
loss.backward()  # Reverse transfer to compute gradient for each neural network node

# Mapping
plt.figure(1, figsize=(10, 3))
plt.subplot(131)
plt.title('Net1')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)

torch.save(net1,'net.pkl')#Save the entire neural network
#Saved Name
torch.save(net1.state_dict(),'net_params.pkl')#Save parameters of the entire neural network

def restore_net():
prediction=net2(x)
# Mapping

plt.subplot(132)
plt.title('Net2')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)

def restore_parames():
#To extract parameters from net3, a network like net1 must be built first
#Then copy the parameters of net1 to net3
net3 = torch.nn.Sequential(
torch.nn.Linear(1, 10),
torch.nn.ReLU(),
torch.nn.Linear(10, 1)
)#But the parameters inside are definitely different
prediction=net3(x)
# Mapping

plt.subplot(133)
plt.title('Net3')
plt.scatter(x.data.numpy(), y.data.numpy())
plt.plot(x.data.numpy(), prediction.data.numpy(), 'r-', lw=5)
plt.show()

save()

restore_net()

restore_parames()``` # Batch Training

DataLoader is the tool torch gives you to wrap your data. So swap your own (numpy array or other) data format into Tensor and put it in this wrapper. DataLoader helps you iterate through your data effectively.

```#Batch Processing
import torch
import torch.utils.data as Data

BATCH_SIZE=5#A stack of data is divided into five groups and five groups

x=torch.linspace(1,10,10)#Divide from 1 to 10 into 10 points
y=torch.linspace(10,1,10)#From 10 to 1.

#Define a database
torch_dataset=Data.TensorDataset(x,y)#x training data y target data
#Use loader to turn training into a small batch
dataset=torch_dataset,
batch_size=BATCH_SIZE,
shuffle=True,#Do not randomly shuffle data during training and start sampling false again as no shuffling

)

for epoch in range(3):#Training these 10 data three times as a whole
for step,(batch_x,batch_y) in enumerate(loader):#Total training 3 times Each training is divided into 2 parts
#The loader defines whether to shuffle the data without shuffling the data. The form of each training is the same: training data point 1 before training data point 2.
#enumerate gives him an index each time it extracts. The first step is
#training
print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
batch_x.numpy(), '| batch y: ', batch_y.numpy())
. . .
Epoch:  0 | Step:  0 | batch x:  [3. 5. 1. 4. 8.] | batch y:  [ 8.  6. 10.  7.  3.]
Epoch:  0 | Step:  1 | batch x:  [ 2.  6. 10.  9.  7.] | batch y:  [9. 5. 1. 2. 4.]
Epoch:  1 | Step:  0 | batch x:  [ 4.  2.  6.  7. 10.] | batch y:  [7. 9. 5. 4. 1.]
Epoch:  1 | Step:  1 | batch x:  [9. 5. 8. 1. 3.] | batch y:  [ 2.  6.  3. 10.  8.]
Epoch:  2 | Step:  0 | batch x:  [4. 7. 6. 2. 8.] | batch y:  [7. 4. 5. 9. 3.]
Epoch:  2 | Step:  1 | batch x:  [10.  9.  1.  3.  5.] | batch y:  [ 1.  2. 10.  8.  6.]```

If BATCH_SIZE=5 is changed to 8 output

```Epoch:  0 | Step:  0 | batch x:  [2. 1. 5. 9. 7. 8. 4. 6.] | batch y:  [ 9. 10.  6.  2.  4.  3.  7.  5.]
Epoch:  0 | Step:  1 | batch x:  [10.  3.] | batch y:  [1. 8.]
Epoch:  1 | Step:  0 | batch x:  [4. 8. 6. 7. 1. 3. 2. 9.] | batch y:  [ 7.  3.  5.  4. 10.  8.  9.  2.]
Epoch:  1 | Step:  1 | batch x:  [ 5. 10.] | batch y:  [6. 1.]
Epoch:  2 | Step:  0 | batch x:  [ 3.  5.  9.  7. 10.  4.  6.  1.] | batch y:  [ 8.  6.  2.  4.  1.  7.  5. 10.]
Epoch:  2 | Step:  1 | batch x:  [2. 8.] | batch y:  [9. 3.]```

Only the data left in this epoch will be returned to you at step=1

# Optimizer

SGD is the most common optimizer, or has no acceleration effect, while Momentum is an improved version of SGD that incorporates a momentum principle.
However, from this result we can see that Adam seems to be less effective than RMSprop. So the more advanced the optimizer, the better the result.
We can try different optimizers in our own experiment to find the one that best suits your data/network.

```import torch
import torch.utils.data as Data
import torch.nn.functional as F
import matplotlib.pyplot as plt

#Hyperparameter
LR=0.01
BATCH_SIZE=32
EPOCH=12

#Regressed data
x = torch.unsqueeze(torch.linspace(-1, 1, 1000), dim=1)
y = x.pow(2) + 0.1*torch.normal(torch.zeros(*x.size()))

# plt.scatter(x.numpy(),y.numpy())
# plt.show()

#Define a database
torch_dataset=Data.TensorDataset(x,y)
dataset=torch_dataset,
batch_size=BATCH_SIZE,
shuffle=True,
num_workers=2,#Work Process
)

#Establishing a neural network
class Net(torch.nn.Module):
def __init__(self):
super(Net,self).__init__()
self.hidden=torch.nn.Linear(1,20)#Hidden Layer
self.predict=torch.nn.Linear(20,1)#output layer
def forward(self,x):
x=F.relu(self.hidden(x))
x=self.predict(x)
return x
#Four different neural networks
if __name__ == '__main__':
net_SGD = Net()
net_Momentum= Net()
net_RMSprop= Net()
nets = [net_SGD, net_Momentum, net_RMSprop, net_Adam]

opt_SGD= torch.optim.SGD(net_SGD.parameters(), lr=LR)
opt_Momentum= torch.optim.SGD(net_Momentum.parameters(), lr=LR, momentum=0.8)
opt_RMSprop= torch.optim.RMSprop(net_RMSprop.parameters(), lr=LR, alpha=0.9)

#Start training
loss_func = torch.nn.MSELoss()#Error calculation formula for regression
losses_his = [[], [], [], []]   # loss of different neural networks when recording training s

for epoch in range(EPOCH):
print('Epoch: ', epoch)
for step, (b_x, b_y) in enumerate(loader):
#Take out the different neural networks one by one
for net, opt, l_his in zip(nets, optimizers, losses_his):
output = net(b_x)  # Input Information Prediction Value
loss = loss_func(output, b_y)  # Errors in prediction and y prediction

loss.backward()  # Reverse transfer to compute gradient for each neural network node
l_his.append(loss.data.numpy())#Put errors in the record
#Print
labels = ['SGD', 'Momentum', 'RMSprop', 'Adam']
for i, l_his in enumerate(losses_his):
plt.plot(l_his, label=labels[i])
plt.legend(loc='best')
plt.xlabel('Steps')
plt.ylabel('Loss')
plt.ylim((0, 0.2))
plt.show()```   7 original articles published. 2. Visits 240

Topics: network Database less