resnet-based training flower image classification model (p31-p37)

This is a series of pytorch notes that follow station b. There are a lot of classified articles about renet model image recognition on the internet, but the image part is usually just a link to it. Or read a well-known article and prepared the dataset from scratch.

One dataset preparation

The dataset consists of 102 categories of flowers from the United Kingdom. Each category consists of 40-258 pictures

It is the 1, 4, 5 parts of the red circle. 1. More than 8,000 picture packages.

4,5 can be downloaded with wget and copied to the project folder


There are 8189 columns in total, and the numbers in each column represent the category number.


-trnid field:There are a total of 1020 columns, and every 10 columns are a picture of a class of flowers. The numbers in each column represent the picture number.    
-valid field:There are a total of 1020 columns, and every 10 columns are a picture of a class of flowers. The numbers in each column represent the picture number.    
-tstid field:There are 6149 columns in total, and the number of columns for each type of flower is variable. The numbers in each column represent the picture number.

import  # For loading mat files
import numpy as np
import os
from PIL import Image
import shutil

labels ='./data/flower_data/imagelabels.mat')
labels = np.array(labels['labels'][0]) - 1

print("labels:", labels)

######## flower dataset: train test valid data id Identification ########
setid ='./data/flower_data/setid.mat')

validation = np.array(setid['valid'][0]) - 1

train = np.array(setid['trnid'][0]) - 1

test = np.array(setid['tstid'][0]) - 1
######## flower data path Data Save Path ########
flower_dir = list()

######## flower data dirs Generate absolute paths and names to hold data ########
for img in os.listdir("/Users/benmu/Downloads/jpg"):
    ######## flower data ########
    flower_dir.append(os.path.join("/Users/benmu/Downloads/jpg", img))

######## flower data dirs sort Absolute path and name sorting of data from smallest to largest ########

# print(flower_dir)

des_folder_train = "/Users/benmu/PycharmProjects/pythonProject128/data/flower_data/train"
for tid in train:
    ######## open image and get label ########
    img =[tid])
    # print(flower_dir[tid])
    img = img.resize((256, 256), Image.ANTIALIAS)
    lable = labels[tid]+1
    # print(lable)

    path = flower_dir[tid]
    print("path:", path)

    base_path = os.path.basename(path)
    print("base_path:", base_path)
    classes = str(lable)
    class_path = os.path.join(des_folder_train, classes)
    # Judgement Result
    if not os.path.exists(class_path):
    print("class_path:", class_path)
    despath = os.path.join(class_path, base_path)
    print("despath:", despath)
des_folder_validation = "/Users/benmu/PycharmProjects/pythonProject128/data/flower_data/validation"

for tid in validation:
    ######## open image and get label ########
    img =[tid])
    # print(flower_dir[tid])
    img = img.resize((256, 256), Image.ANTIALIAS)
    lable = labels[tid]+1
    # print(lable)
    path = flower_dir[tid]
    print("path:", path)
    base_path = os.path.basename(path)
    print("base_path:", base_path)
    classes =  str(lable)
    class_path = os.path.join(des_folder_validation, classes)
    # Judgement Result
    if not os.path.exists(class_path):
    print("class_path:", class_path)
    despath = os.path.join(class_path, base_path)
    print("despath:", despath)

des_folder_test = "/Users/benmu/PycharmProjects/pythonProject128/data/flower_data/test"

for tid in test:
    ######## open image and get label ########
    img =[tid])
    # print(flower_dir[tid])
    img = img.resize((256, 256), Image.ANTIALIAS)
    lable = labels[tid]+1
    # print(lable)
    path = flower_dir[tid]
    print("path:", path)
    base_path = os.path.basename(path)
    print("base_path:", base_path)
    classes = str(lable)
    class_path = os.path.join(des_folder_test, classes)
    # Judgement Result
    if not os.path.exists(class_path):
    print("class_path:", class_path)
    despath = os.path.join(class_path, base_path)
    print("despath:", despath)

Classification Number Effect:

Here the image is uniformly sized 256x256. Common models require this or 224x224 size. You can also process the following image enhancement sections without adjusting them.

Train, validation are 1020, test 6149 can adjust itself, I just changed test to train.

Data Enhancement:

The framework is already implemented and does not need to be handled by opencv alone.

data_dir = './data/flower_data/'
train_dir = data_dir + '/train'
valid_dir = data_dir + '/valid'

# Perform data enhancement operations
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomRotation(45),  # Random rotation, -45 to 45 degrees
        transforms.CenterCrop(224),  # Cut from center to 224
        transforms.RandomHorizontalFlip(p=0.5),  # Probability with p random horizontal inversion
        transforms.RandomVerticalFlip(p=0.5),  # Flip vertical
        transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1),  ##Brightness, Contrast, Saturation, Hue
        transforms.RandomGrayscale(p=0.025),  # 0.025% chance of becoming grayscale
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),  # Mean, Variance
    'valid': transforms.Compose([
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

Image addition refers to rotation, cropping, and finally normalization on the training set. valid does not require rotation.

batch_size = 8
# Include: path, enhancement
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'valid']}
dataloaders = {x:[x], batch_size=batch_size, shuffle=True) for x in
               ['train', 'valid']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'valid']}
class_names = image_datasets['train'].classes

Print image_ The datasets contain information:

{'train': Dataset ImageFolder
    Number of datapoints: 6149
    Root location: ./data/flower_data/train
Transform: Compose(
               RandomRotation(degrees=[-45.0, 45.0], interpolation=nearest, expand=False, fill=0)
               CenterCrop(size=(224, 224))
               ColorJitter(brightness=[0.8, 1.2], contrast=[0.9, 1.1], saturation=[0.9, 1.1], hue=[-0.1, 0.1])
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
           ), 'valid': Dataset ImageFolder
    Number of datapoints: 1020
    Root location: ./data/flower_data/valid
Transform: Compose(
               Resize(size=256, interpolation=bilinear, max_size=None, antialias=None)
               CenterCrop(size=(224, 224))
               Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

The actual name of the label cat_to_name.json

Show data

Note that tensor's data needs to be converted to numpy The format of the

def im_convert(tensor):
    image ="cpu").clone().detach()
    image = image.numpy().squeeze()
    ##Because tensor is c*h*w, we need to turn it into h*w*c
    image = image.transpose(1, 2, 0)
    image = image * np.array((0.229, 0.224, 0.225)) + np.array((0.485, 0.456, 0.406))
    ##clip function, which changes numbers less than 0 to 0 and numbers greater than 1 to 1
    image = image.clip(0, 1)
    return image

fig=plt.figure(figsize=(20, 12))
columns = 4
rows = 2

dataiter = iter(dataloaders['valid'])
inputs, classes =

for idx in range (columns*rows):
    ax = fig.add_subplot(rows, columns, idx+1, xticks=[], yticks=[])

2. Load the model provided in models

The purpose of migration learning is to use the weights and bias parameters of existing models as our initialization parameters. Try to approximate your model as much as possible.

What to learn? There are usually two strategies: A then trains, B freezes the model layer and only changes the full connection layer.

2.1 Whether to train with GPU

# Whether to train with GPU
train_on_gpu = torch.cuda.is_available()

if not train_on_gpu:
    print('CUDA is not available.  Training on CPU ...')
    print('CUDA is available!  Training on GPU ...')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

2.2 Use trained renet

model_ft = models.resnet152()

Because my mac has no cuda, no GPU, and 224x224x3 pictures are entered, the model: resnet152 is not estimated to work. (

Full Connection Layer out_feature=1000, to modify to its own output 102

2.3 Example of pytorch website, defining image classification model

Here's just a list of resnet. vgg, other not pasted

model_name = 'resnet'  # More options ['resnet','alexnet','vgg','squeezenet','densenet','inception']
# Do you want to use trained characteristics?
feature_extract = True

def set_parameter_requires_grad(model, feature_extracting):  # Use resnet's trained weight parameters, no more training
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False

def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    # Choose the appropriate model and the initialization methods differ slightly from model to model.
    model_ft = None
    input_size = 0

    if model_name == "resnet":
        """ Resnet152
        model_ft = models.resnet50(pretrained=use_pretrained)  # Download resnet model locally
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.fc.in_features
        model_ft.fc = nn.Sequential(nn.Linear(num_ftrs, 102),  # Full Connection Layer Output Changed to Our Image Category 102
                                    nn.LogSoftmax(dim=1))  # Do one more log operation on the result of softmax
        input_size = 224

    elif model_name == "vgg":
        """ VGG11_bn
        model_ft = models.vgg16(pretrained=use_pretrained)
        set_parameter_requires_grad(model_ft, feature_extract)
        num_ftrs = model_ft.classifier[6].in_features
        model_ft.classifier[6] = nn.Linear(num_ftrs, num_classes)
        input_size = 224

        print("Invalid model name, exiting...")

    return model_ft, input_size

2.4 Set which layers need training

model_ft, input_size = initialize_model(model_name, 102, feature_extract, use_pretrained=True)

#GPU computing
model_ft =

# Model Save

# Whether to train all layers
params_to_update = model_ft.parameters()
print("Params to learn:")
if feature_extract:
    params_to_update = []
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:

Output parameters:

Params to learn:

Print the model, and you can see that the output of the last full connection layer has been changed to 102

Note: models.resnet152(pretrained=use_pretrained) #Download the resnet model locally

This more than 200M may be slower, take a look at the local path:. cache/torch/hub/checkpoints

2.5 Optimizer Settings

# Optimizer Settings
optimizer_ft = optim.Adam(params_to_update, lr=1e-2)
scheduler = optim.lr_scheduler.StepLR(optimizer_ft, step_size=7, gamma=0.1)#Optim. Lr_ Schduler learning rate adjustment strategy, learning rate decays to 1/10 of original every 7 epoch s
#The last layer already has LogSoftmax(), so it cannot be nn.CrossEntropyLoss() is calculated, nn.CrossEntropyLoss() is equivalent to logSoftmax () and nn.NLLLoss() Integration
criterion = nn.NLLLoss()

Here the teacher explains why the loss function should not use cross-entropy instead of NLLLoss.

2.6 Training Modules

def train_model(model, dataloaders, criterion, optimizer, num_epochs=25, is_inception=False,filename=filename):  #Is_ Is inception using another network
    since = time.time()
    best_acc = 0
    checkpoint = torch.load(filename)
    best_acc = checkpoint['best_acc']
    model.class_to_idx = checkpoint['mapping']

    val_acc_history = []
    train_acc_history = []
    train_losses = []
    valid_losses = []
    LRs = [optimizer.param_groups[0]['lr']]

    best_model_wts = copy.deepcopy(model.state_dict())

    for epoch in range(num_epochs):
        print('Epoch {}/{}'.format(epoch, num_epochs - 1))
        print('-' * 10)

        # Training and Validation
        for phase in ['train', 'valid']:
            if phase == 'train':
                model.train()  # train
                model.eval()   # Verification

            running_loss = 0.0
            running_corrects = 0

            # Get all the data
            for inputs, labels in dataloaders[phase]:
                inputs =
                labels =

                # Zero
                # Calculate and update gradients only during training
                with torch.set_grad_enabled(phase == 'train'):
                    if is_inception and phase == 'train':
                        outputs, aux_outputs = model(inputs)
                        loss1 = criterion(outputs, labels)
                        loss2 = criterion(aux_outputs, labels)
                        loss = loss1 + 0.4*loss2
                    else:#resnet does this
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)

                    _, preds = torch.max(outputs, 1)

                    # Update weights during training
                    if phase == 'train':

                # Calculate loss
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds ==

            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects.double() / len(dataloaders[phase].dataset)
            time_elapsed = time.time() - since
            print('Time elapsed {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
            print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc))

            # Get the best model of the time
            if phase == 'valid' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                state = {
                  'state_dict': model.state_dict(),
                  'best_acc': best_acc,
                  'optimizer' : optimizer.state_dict(),
      , filename)
            if phase == 'valid':
            if phase == 'train':
        print('Optimizer learning rate : {:.7f}'.format(optimizer.param_groups[0]['lr']))

    time_elapsed = time.time() - since
    print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
    print('Best val Acc: {:4f}'.format(best_acc))

    # Use the best time after training as the final result of the model
    return model, val_acc_history, train_acc_history, valid_losses, train_losses, LRs 

Start training:

model_ft, val_acc_history, train_acc_history, valid_losses, train_losses, LRs  = train_model(model_ft, dataloaders, criterion, optimizer_ft, num_epochs=20, is_inception=(model_name=="inception"))

Then my poor computer started humming. Only once in 19 minutes. At least 20 times, the teacher suggested 50 times. This level of data will be useless without a computer with a GPU that is essentially a low-profile computer. (

Epoch 0/19
Time elapsed 16m 29s
train Loss: 9.5165 Acc: 0.3410
Time elapsed 18m 49s
valid Loss: 10.0126 Acc: 0.5216
Optimizer learning rate : 0.0010000

Find an individual windows book tomorrow and try it again.

