Download and use of cifar dataset

Posted by Sonu Kapoor on Tue, 07 Jan 2020 02:13:21 +0100

git

Download the cifar10 file and put it in the project path

Project structure

download

from cifar10 import maybe_download_and_extract

maybe_download_and_extract()

Default download path

Read display manually

import numpy as np
import pylab

filename = '/tmp/cifar10_data/cifar-10-batches-bin/test_batch.bin'  
bytestream = open(filename, "rb")
# test has 10000 pieces of data
buf = bytestream.read(10000 * (1 + 32 * 32 * 3))  
bytestream.close()  
  
data = np.frombuffer(buf, dtype=np.uint8)  
data = data.reshape(10000, 1 + 32*32*3)  
labels_images = np.hsplit(data, [1])  
labels = labels_images[0].reshape(10000)  
images = labels_images[1].reshape(10000, 32, 32, 3)  
  
img = np.reshape(images[0], (3, 32, 32)) #Export first view
img = img.transpose(1, 2, 0)  
  
print(labels[0])
pylab.imshow(img)
pylab.show()

Use native functions

The self-contained reading function preprocesses the image, cuts 32X32 into 24X24, and then standardizes the image (subtracting the average pixel and dividing by the pixel variance). The advantage of this is that all the input is within an effective data distribution, which is convenient for feature classification processing, and makes the convergence of gradient descent algorithm faster

# Put it in cifar directory
import cifar10_input
import tensorflow as tf
import pylab

# Fetch data
batch_size = 12
data_dir = '/tmp/cifar10_data/cifar-10-batches-bin'
images_test, labels_test = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=batch_size)

# (12, 24, 24, 3) (12,)
print(images_test.shape, labels_test.shape)


sess = tf.Session()
tf.global_variables_initializer().run(session=sess)
tf.train.start_queue_runners(sess=sess)
image_batch, label_batch = sess.run([images_test, labels_test])
print("__\n", image_batch[0])

print("__\n", label_batch[0])
pylab.imshow(image_batch[0])
pylab.show()

It can be seen that the image becomes disordered after processing

If you want to see normal data, you can read it manually or modify the function of the read function

Modify image size and do not process

Read and display with opencv, note that the data format needs to be converted to u8 type

# Put it in cifar directory
import cifar10_input
import tensorflow as tf
import pylab
import cv2 as cv
import numpy as np

# Fetch data
batch_size = 12
data_dir = '/tmp/cifar10_data/cifar-10-batches-bin'
images_test, labels_test = cifar10_input.inputs(eval_data=True, data_dir=data_dir, batch_size=batch_size)

# (12, 24, 24, 3) (12,)
print(images_test.shape, labels_test.shape)

sess = tf.Session()
tf.global_variables_initializer().run(session=sess)
tf.train.start_queue_runners(sess=sess)
image_batch, label_batch = sess.run([images_test, labels_test])
print("__\n", image_batch[0])

print("__\n", label_batch[0])


img = np.asarray(image_batch[0], np.uint8)
print(img.shape)
print(img)
cv.imshow('img', img)
pylab.imshow(img)
pylab.show()
cv.waitKey(0)

Topics: Session git github OpenCV

Programmer Think

Download and use of cifar dataset

Hot Topics