Redes convolucionales imágenes de lenguaje de signos
(SPANISH) Resolucion de la competicion Kaggle de lenguaje de signos
Introducción
Se resolverá la competición de Kaggle sobre imágenes de lenguaje de signos (clasificación multiclase), disponible en este enlace.
El dataset de lenguajes de signos surgió como una evolución de MNIST y Fashion MNIST, pero siguiendo la misma filosofía y formato: píxeles 28x28 en blanco y negro, con 26 etiquetas (0-25) correspondiente a letras A-Z (no hay casos para 9=J 25=Z por ser movimientos). El dataset de training contiene 27,455 muestras y el de pruebas 7,172.
import csv
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from os import getcwd
# You will need to write code that will read the file passed
# into this function. The first line contains the column headers
# so you should ignore it
# Each successive line contians 785 comma separated values between 0 and 255
# The first value is the label
# The rest are the pixel values for that picture
# The function will return 2 np.array types. One with all the labels
# One with all the images
#
# Tips:
# If you read a full line (as 'row') then row[0] has the label
# and row[1:785] has the 784 pixel values
# Take a look at np.array_split to turn the 784 pixels into 28x28
# You are reading in strings, but need the values to be floats
# Check out np.array().astype for a conversion
def get_data(filename):
# np.array of shape (data length, labels & pixels)
my_arr = np.loadtxt(filename, delimiter=',', skiprows=1)
# get label & image arrays
labels = my_arr[:,0].astype('int')
images = my_arr[:,1:]
# reshape image from 784 to (28, 28)
images = images.astype('float').reshape(images.shape[0], 28, 28)
# just in case to avoid memory problem
my_arr = None
return images, labels
path_sign_mnist_train = f"{getcwd()}/../tmp2/sign_mnist_train.csv"
path_sign_mnist_test = f"{getcwd()}/../tmp2/sign_mnist_test.csv"
training_images, training_labels = get_data(path_sign_mnist_train)
testing_images, testing_labels = get_data(path_sign_mnist_test)
# Keep these
print(training_images.shape)
print(training_labels.shape)
print(testing_images.shape)
print(testing_labels.shape)
# Their output should be:
# (27455, 28, 28)
# (27455,)
# (7172, 28, 28)
# (7172,)
# In this section you will have to add another dimension to the data
# So, for example, if your array is (10000, 28, 28)
# You will need to make it (10000, 28, 28, 1)
# Hint: np.expand_dims
training_images = np.expand_dims(training_images, axis=-1)
testing_images = np.expand_dims(testing_images, axis=-1)
# Create an ImageDataGenerator and do Image Augmentation
train_datagen = ImageDataGenerator(
rescale=1./255,
rotation_range=40,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest')
validation_datagen = ImageDataGenerator(
rescale=1./255)
# Keep These
print(training_images.shape)
print(testing_images.shape)
# Their output should be:
# (27455, 28, 28, 1)
# (7172, 28, 28, 1)
# Define the model
# Use no more than 2 Conv2D and 2 MaxPooling2D
model = tf.keras.models.Sequential([
tf.keras.layers.Conv2D(128, (3,3), activation='relu', input_shape=(28, 28, 1)),
tf.keras.layers.MaxPooling2D(2,2),
tf.keras.layers.Conv2D(32, (3,3), activation='relu'),
tf.keras.layers.MaxPooling2D(2,2),
# Flatten the results to feed into a DNN
tf.keras.layers.Flatten(),
# 512 neuron hidden layer
tf.keras.layers.Dense(512, activation='relu'),
tf.keras.layers.Dense(25, activation='softmax')
])
# Compile Model.
model.compile(optimizer = 'adam',
loss = 'categorical_crossentropy',
metrics = ['accuracy'])
# Train the Model
from tensorflow.keras.utils import to_categorical
training_labels1 = to_categorical(training_labels)
testing_labels1 = to_categorical(testing_labels)
train_generator = train_datagen.flow(training_images, training_labels1,
batch_size=500)
validation_generator = validation_datagen.flow(testing_images, testing_labels1,
batch_size=500)
history = model.fit_generator(train_generator,
validation_data=validation_generator,
steps_per_epoch=100,
epochs=3,
validation_steps=30,
verbose=2)
model.evaluate(testing_images, testing_labels1, verbose=0)
# Plot the chart for accuracy and loss on both training and validation
%matplotlib inline
import matplotlib.pyplot as plt
acc = history.history['accuracy' ]
val_acc = history.history['val_accuracy' ]
loss = history.history['loss' ]
val_loss = history.history['val_loss' ]
epochs = range(len(acc))
plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()
plt.figure()
plt.plot(epochs, loss, 'r', label='Training Loss')
plt.plot(epochs, val_loss, 'b', label='Validation Loss')
plt.title('Training and validation loss')
plt.legend()
plt.show()