Basic supplement of digital image and machine vision

Posted by DMeerholz on Fri, 10 Dec 2021 15:21:57 +0100

Preliminary analysis of BMP pictures

Open a color image file and save it as 32-bit, 16 bit color and 256 color, 16 color and monochrome bitmap (BMP) files respectively

Calculate the storage capacity of pictures in memory
The bitmap size calculation formula is; (length) × high × Bit depth) / 8/1024 KB

The size of this 256 color bitmap is calculated to be (5125128) / 8/1024=256KB, which is consistent with the actual size of the picture
Open bitmap with notepad + +
Original drawing:

32 color.bmp

16 color.bmp

256 color bitmap

16 color bitmap:

Monochrome bitmap:

0 ~ 1 bytes are file type, 0x4d42 is fixed BM
2 ~ 5 four bytes are the file size, 0x184e, i.e. 6222
6 ~ 9 bytes are reserved fields, all 0
a~d four bytes are the offset bytes from the file header to the actual bitmap data
12 ~ 15 bytes represent the picture width, and 0xdc is 220
16 ~ 19 bytes represent the picture height, 0xdc is 220
1a~1b two bytes, constant 0x1
1c~1d two bytes represent the bits occupied by pixels. Here, 0x1 is two colors, 16 colors are 0x4, 16 colors, and 256 colors are 0x8, 256 colors
1e~21 four bytes indicate whether the picture is compressed, and 0x0 indicates not compressed
Four of 22 ~ 25 represent the image size, and 0x1810 is 6160
26 ~ 29 four bytes represent horizontal resolution
2a~2d four bytes represent the vertical resolution
23 ~ 31 four bytes represent the number of color indexes actually used
32 ~ 35 four bytes represent the number of important color indexes
It can be found that the file header occupies a total of 40 bytes, which is hexadecimal.
For different pictures, the file size, length, width and pixel ratio are different.

2, Singular value decomposition (SVD) is used to extract (reduce) the eigenvalues of a picture;

import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib as mpl
from pprint import pprint


def restore1(sigma, u, v, K):  # Singular value, left eigenvector, right eigenvector
    m = len(u)
    n = len(v[0])
    a = np.zeros((m, n))
    for k in range(K):
        uk = u[:, k].reshape(m, 1)
        vk = v[k].reshape(1, n)
        a += sigma[k] * np.dot(uk, vk)
    a[a < 0] = 0
    a[a > 255] = 255
    # a = a.clip(0, 255)
    return np.rint(a).astype('uint8')


def restore2(sigma, u, v, K):  # Singular value, left eigenvector, right eigenvector
    m = len(u)
    n = len(v[0])
    a = np.zeros((m, n))
    for k in range(K+1):
        for i in range(m):
            a[i] += sigma[k] * u[i][k] * v[k]
    a[a < 0] = 0
    a[a > 255] = 255
    return np.rint(a).astype('uint8')


if __name__ == "__main__":
    A = Image.open("./Original drawing.png", 'r')
    print(A)
    output_path = r'./SVD_Output'
    if not os.path.exists(output_path):
        os.mkdir(output_path)
    a = np.array(A)
    print(a.shape)
    K = 50
    u_r, sigma_r, v_r = np.linalg.svd(a[:, :, 0])
    u_g, sigma_g, v_g = np.linalg.svd(a[:, :, 1])
    u_b, sigma_b, v_b = np.linalg.svd(a[:, :, 2])
    plt.figure(figsize=(11, 9), facecolor='w')
    mpl.rcParams['font.sans-serif'] = ['simHei']
    mpl.rcParams['axes.unicode_minus'] = False
    for k in range(1, K+1):
        print(k)
        R = restore1(sigma_r, u_r, v_r, k)
        G = restore1(sigma_g, u_g, v_g, k)
        B = restore1(sigma_b, u_b, v_b, k)
        I = np.stack((R, G, B), axis=2)
        Image.fromarray(I).save('%s\\svd_%d.png' % (output_path, k))
        if k <= 12:
            plt.subplot(3, 4, k)
            plt.imshow(I)
            plt.axis('off')
            plt.title('Number of singular values:%d' % k)
    plt.suptitle('SVD Image decomposition', fontsize=20)
    plt.tight_layout()
    # plt.subplots_adjust(top=0.9)
    plt.show()

3, The number of coins in the image is detected by image opening and closing operation (corrosion expansion)

import cv2
import numpy as np


def stackImages(scale, imgArray):
    """
        Press multiple images into the same window for display
        :param scale:float Type, output image display percentage, control zoom scale, 0.5=The image resolution is reduced by half
        :param imgArray:Tuple nested list, image matrix to be arranged
        :return:Output image
    """
    rows = len(imgArray)
    cols = len(imgArray[0])
    rowsAvailable = isinstance(imgArray[0], list)
    width = imgArray[0][0].shape[1]
    height = imgArray[0][0].shape[0]
    if rowsAvailable:
        for x in range(0, rows):
            for y in range(0, cols):
                if imgArray[x][y].shape[:2] == imgArray[0][0].shape[:2]:
                    imgArray[x][y] = cv2.resize(imgArray[x][y], (0, 0), None, scale, scale)
                else:
                    imgArray[x][y] = cv2.resize(imgArray[x][y], (imgArray[0][0].shape[1], imgArray[0][0].shape[0]),
                                                None, scale, scale)
                if len(imgArray[x][y].shape) == 2: imgArray[x][y] = cv2.cvtColor(imgArray[x][y], cv2.COLOR_GRAY2BGR)
        imageBlank = np.zeros((height, width, 3), np.uint8)
        hor = [imageBlank] * rows
        hor_con = [imageBlank] * rows
        for x in range(0, rows):
            hor[x] = np.hstack(imgArray[x])
        ver = np.vstack(hor)
    else:
        for x in range(0, rows):
            if imgArray[x].shape[:2] == imgArray[0].shape[:2]:
                imgArray[x] = cv2.resize(imgArray[x], (0, 0), None, scale, scale)
            else:
                imgArray[x] = cv2.resize(imgArray[x], (imgArray[0].shape[1], imgArray[0].shape[0]), None, scale, scale)
            if len(imgArray[x].shape) == 2: imgArray[x] = cv2.cvtColor(imgArray[x], cv2.COLOR_GRAY2BGR)
        hor = np.hstack(imgArray)
        ver = hor
    return ver


# Read picture
src = cv2.imread("coin.png")
img = src.copy()

# Grayscale
img_1 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Binarization
ret, img_2 = cv2.threshold(img_1, 127, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)

# corrosion
kernel = np.ones((20, 20), int)
img_3 = cv2.erode(img_2, kernel, iterations=1)

# expand
kernel = np.ones((3, 3), int)
img_4 = cv2.dilate(img_3, kernel, iterations=1)

# Find the coin center
contours, hierarchy = cv2.findContours(img_4, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)[-2:]

# Identification coin
cv2.drawContours(img, contours, -1, (0, 0, 255), 5)

# display picture
cv2.putText(img, "count:{}".format(len(contours)), (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 3)
cv2.putText(src, "src", (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 3)
cv2.putText(img_1, "gray", (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 3)
cv2.putText(img_2, "thresh", (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 3)
cv2.putText(img_3, "erode", (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 3)
cv2.putText(img_4, "dilate", (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (255, 0, 0), 3)
imgStack = stackImages(1, ([src, img_1, img_2], [img_3, img_4, img]))
cv2.imshow("imgStack", imgStack)
cv2.waitKey(0)

4, Using image gradient, opening and closing, contour operation, etc., locate and extract the barcode in the picture, and then call the barcode library to obtain the barcode characters

import cv2
import numpy as np
import imutils
from pyzbar import pyzbar


def stackImages(scale, imgArray):
    """
        Press multiple images into the same window for display
        :param scale:float Type, output image display percentage, control zoom scale, 0.5=The image resolution is reduced by half
        :param imgArray:Tuple nested list, image matrix to be arranged
        :return:Output image
    """
    rows = len(imgArray)
    cols = len(imgArray[0])
    rowsAvailable = isinstance(imgArray[0], list)
    width = imgArray[0][0].shape[1]
    height = imgArray[0][0].shape[0]
    if rowsAvailable:
        for x in range(0, rows):
            for y in range(0, cols):
                if imgArray[x][y].shape[:2] == imgArray[0][0].shape[:2]:
                    imgArray[x][y] = cv2.resize(imgArray[x][y], (0, 0), None, scale, scale)
                else:
                    imgArray[x][y] = cv2.resize(imgArray[x][y], (imgArray[0][0].shape[1], imgArray[0][0].shape[0]),
                                                None, scale, scale)
                if len(imgArray[x][y].shape) == 2: imgArray[x][y] = cv2.cvtColor(imgArray[x][y], cv2.COLOR_GRAY2BGR)
        imageBlank = np.zeros((height, width, 3), np.uint8)
        hor = [imageBlank] * rows
        hor_con = [imageBlank] * rows
        for x in range(0, rows):
            hor[x] = np.hstack(imgArray[x])
        ver = np.vstack(hor)
    else:
        for x in range(0, rows):
            if imgArray[x].shape[:2] == imgArray[0].shape[:2]:
                imgArray[x] = cv2.resize(imgArray[x], (0, 0), None, scale, scale)
            else:
                imgArray[x] = cv2.resize(imgArray[x], (imgArray[0].shape[1], imgArray[0].shape[0]), None, scale, scale)
            if len(imgArray[x].shape) == 2: imgArray[x] = cv2.cvtColor(imgArray[x], cv2.COLOR_GRAY2BGR)
        hor = np.hstack(imgArray)
        ver = hor
    return ver


# Read picture
src = cv2.imread("TXM.jpg")
img = src.copy()

# Grayscale
img_1 = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)

# Gaussian filtering
img_2 = cv2.GaussianBlur(img_1, (5, 5), 1)

# Sobel operator
sobel_x = cv2.Sobel(img_2, cv2.CV_64F, 1, 0, ksize=3)
sobel_y = cv2.Sobel(img_2, cv2.CV_64F, 0, 1, ksize=3)
sobel_x = cv2.convertScaleAbs(sobel_x)
sobel_y = cv2.convertScaleAbs(sobel_y)
img_3 = cv2.addWeighted(sobel_x, 0.5, sobel_y, 0.5, 0)

# Mean square wave
img_4 = cv2.blur(img_3, (5, 5))

# Binarization
ret, img_5 = cv2.threshold(img_4, 127, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)

# Closed operation
kernel = np.ones((100, 100), int)
img_6 = cv2.morphologyEx(img_5, cv2.MORPH_CLOSE, kernel)

# Open operation
kernel = np.ones((200, 200), int)
img_7 = cv2.morphologyEx(img_6, cv2.MORPH_OPEN, kernel)

# Draw barcode area
contours = cv2.findContours(img_7, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
contours = imutils.grab_contours(contours)
c = sorted(contours, key=cv2.contourArea, reverse=True)[0]
rect = cv2.minAreaRect(c)
box = cv2.cv.BoxPoints(rect) if imutils.is_cv2() else cv2.boxPoints(rect)
box = np.int0(box)
cv2.drawContours(img, [box], -1, (0, 255, 0), 20)

# Display picture information
cv2.putText(img, "results", (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 10.0, (255, 0, 0), 30)
cv2.putText(img_1, "gray", (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 10.0, (255, 0, 0), 30)
cv2.putText(img_2, "GaussianBlur", (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 10.0, (255, 0, 0), 30)
cv2.putText(img_3, "Sobel", (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 10.0, (255, 0, 0), 30)
cv2.putText(img_4, "blur", (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 10.0, (255, 0, 0), 30)
cv2.putText(img_5, "threshold", (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 10.0, (255, 0, 0), 30)
cv2.putText(img_6, "close", (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 10.0, (255, 0, 0), 30)
cv2.putText(img_7, "open", (200, 200), cv2.FONT_HERSHEY_SIMPLEX, 10.0, (255, 0, 0), 30)

# Output barcode
barcodes = pyzbar.decode(src)
for barcode in barcodes:
    barcodeData = barcode.data.decode("utf-8")
    cv2.putText(img, barcodeData, (200, 600), cv2.FONT_HERSHEY_SIMPLEX, 5.0, (0, 255, 0), 30)

# Show all pictures
imgStack = stackImages(0.4, ([img_1, img_2, img_3, img_4], [img_5, img_6, img_7, img]))
cv2.imshow("imgStack", imgStack)

cv2.waitKey(0)

reference resources

https://blog.csdn.net/m0_51120713/article/details/121849819

Topics: OpenCV Computer Vision

Programmer Think