Advanced networks¶

El presente proyecto se utilizan redes neuronales con arquitectura GAN para leer un banco de imágenes de la serie animé "Naruto" en la cual salen caracterizados los distintos personajes de la serie, para luego poder generar un personaje propio.

In [1]:

import numpy as np
from numpy import expand_dims
from numpy import zeros
from numpy import ones
from numpy import vstack
from numpy.random import randn
from numpy.random import randint
from tensorflow.keras.datasets.mnist import load_data

from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Dropout
from matplotlib import pyplot as plt
from skimage import io
from skimage.transform import resize

import time
import os
from os import listdir
from os.path import isfile, join
#from google.colab.patches import cv2_imshow # for image display
import cv2

2022-03-29 16:24:26.620501: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-03-29 16:24:26.620548: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.

In [4]:

# On your laptops
#os.mkdir('images_long')
#os.mkdir('models_long')

In [75]:

print('Files in this folder:', os.listdir('images_long'))

Files in this folder: ['generated_plot_e040.png', 'generated_plot_e100.png', 'generated_plot_e080.png', 'generated_plot_e090.png', 'generated_plot_e010.png', 'generated_plot_e050.png', 'generated_plot_e060.png', '.ipynb_checkpoints', 'generated_plot_e030.png', 'generated_plot_e020.png', 'generated_plot_e070.png']

In [76]:

print('Files in this folder:', os.listdir('models_long'))

Files in this folder: ['generator_model_060.h5', 'generator_model_040.h5', 'generator_model_080.h5', 'generator_model_020.h5', 'generator_model_010.h5', 'generator_model_070.h5', 'generator_model_090.h5', 'generator_model_050.h5', 'generator_model_100.h5', 'generator_model_030.h5']

Preparation of data¶

In [7]:

#cargamos la data desde el jupyter notebook
images = {}
im2 = []
img = {}
original_shape = {}
folder_path = 'Naruto/images-small'
for root, dirs, files in os.walk(folder_path):
    path = root.split(os.sep)
    for index, file in enumerate(files):
        im2 = [ f for f in listdir(root) if isfile(join(root,f)) ] #arma ruta
        images[index] = join(root,im2[index]) #nombre y ruta de la imagen
        img[index] = io.imread(images[index]) #imagen
        original_shape[index] = img[index].shape #shape

In [8]:

#se imprime foto y shape de primeras 30 muestras
for i in range(0,10):
    plt.imshow(img[i])
    plt.show()
    print(img[i].shape)

No description has been provided for this image

(109, 110, 3)

(82, 82, 3)

(63, 63, 3)

(63, 63, 3)

(72, 72, 3)

(72, 72, 3)

(63, 64, 3)

(63, 63, 3)

(72, 81, 3)

(109, 119, 3)

In [9]:

#debido a que tamaño es muy variado, se calculará el promedio de cada eje para luego realizar un resize
x,y = 0,0
for i in range(len(img)):
    x += original_shape[i][0]
    y += original_shape[i][1]

x = x/len(img)
y = y/len(img)
print (x, y)

80.4988358556461 80.47002328288708

In [10]:

#se cambiará tamaño a 56x56

new_shapes = {}
data = np.zeros(shape=(len(img),56,56,3), dtype=np.uint8)
for i in range(len(img)):
    im = img[i]
    res = cv2.resize(img[i], (56, 56), interpolation = cv2.INTER_AREA)
    new_shapes[i] = res.shape
    data[i] = res

In [11]:

#verificamos promedio de nuevo dataset
x,y = 0,0
for i in range(len(img)):
    x += new_shapes[i][0]
    y += new_shapes[i][1]

x = x/len(img)
y = y/len(img)
print (x, y)

56.0 56.0

In [12]:

#imprimimos datos con tamaño y su shape para confirmar
for i in range(0,30):
    plt.imshow(data[i])
    plt.show()
    print(data[i].shape)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

(56, 56, 3)

Discriminator, Generator & combined architectures (GAN)¶

In [12]:

# define the standalone discriminator model
def define_discriminator(in_shape=(56,56,3)):
    model = Sequential()
    model.add(Conv2D(64, (3,3), padding='same', input_shape=in_shape))
    model.add(LeakyReLU(alpha=0.2))
    # downsample
    model.add(Conv2D(128, (3,3), strides=(2,2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    # downsample
    model.add(Conv2D(128, (3,3), strides=(2,2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))    
    # downsample
    model.add(Conv2D(256, (3,3), strides=(2,2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    # classifier
    model.add(Flatten())
    model.add(Dropout(0.4))
    model.add(Dense(1, activation='sigmoid'))
    # compile model
    opt = Adam(learning_rate=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model

###############################################################################################

# define the standalone generator model
def define_generator(latent_dim):
    model = Sequential()
    # foundation for 7x7 image
    n_nodes = 256 * 7 * 7
    model.add(Dense(n_nodes, input_dim=latent_dim))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Reshape((7, 7, 256)))
    # upsample to 14x14
    model.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    # upsample to 28x28
    model.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    # upsample to 56x56
    model.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
    model.add(LeakyReLU(alpha=0.2))
    model.add(Conv2D(3, (3,3), activation='tanh', padding='same'))
    return model

###############################################################################################

  # define the combined generator and discriminator model, for updating the generator
def define_gan(g_model, d_model):
    # make weights in the discriminator not trainable
    d_model.trainable = False
    # connect them
    model = Sequential()
    # add generator
    model.add(g_model)
    # add the discriminator
    model.add(d_model)
    # compile model
    opt = Adam(learning_rate=0.0002, beta_1=0.5)
    model.compile(loss='binary_crossentropy', optimizer=opt)
    return model

Data reload¶

In [13]:

# load and prepare mnist training images
def load_real_samples():
    # load mnist dataset
    trainX = data
    # expand to 3d, e.g. add channels dimension
    #X = expand_dims(trainX, axis=-1)
    #X = data / 255.0
    # scale from [0,255] to [-1,1]
    X = (trainX - 127.5) / 127.5
    return X

Generations (images, latent points)¶

In [14]:

# select real samples
def generate_real_samples(dataset, n_samples):
    # choose random instances
    ix = randint(0, dataset.shape[0], n_samples)
    # retrieve selected images
    X = dataset[ix]
    # generate 'real' class labels (1)
    y = ones((n_samples, 1))
    return X, y
 
# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, n_samples):
    # generate points in the latent space
    x_input = randn(latent_dim * n_samples)
    # reshape into a batch of inputs for the network
    x_input = x_input.reshape(n_samples, latent_dim)
    return x_input
 
# use the generator to generate n fake examples, with class labels
def generate_fake_samples(g_model, latent_dim, n_samples):
    # generate points in latent space
    x_input = generate_latent_points(latent_dim, n_samples)
    # predict outputs
    X = g_model.predict(x_input)
    # create 'fake' class labels (0)
    y = zeros((n_samples, 1))
    return X, y

Images evolution¶

In [15]:

# create and save a plot of generated images (reversed grayscale)
def save_plot(examples, epoch, n=10):
    # plot images
    examples = (examples + 1) / 2.0
    for i in range(n * n):
        # define subplot
        plt.subplot(n, n, 1 + i)
        # turn off axis
        plt.axis('off')
        # plot raw pixel data , cmap='gray_r'
        plt.imshow(examples[i])
    # save plot to file
    filename = 'images_long/generated_plot_e%03d.png' % (epoch+1)
    plt.savefig(filename)
    plt.close()

Performances display¶

In [16]:

# evaluate the discriminator, plot generated images, save generator model
def summarize_performance(epoch, g_model, d_model, dataset, latent_dim, n_samples=100):
    # prepare real samples
    X_real, y_real = generate_real_samples(dataset, n_samples)
    # evaluate discriminator on real examples
    _, acc_real = d_model.evaluate(X_real, y_real, verbose=0)
    # prepare fake examples
    x_fake, y_fake = generate_fake_samples(g_model, latent_dim, n_samples)
    # evaluate discriminator on fake examples
    _, acc_fake = d_model.evaluate(x_fake, y_fake, verbose=0)
    # summarize discriminator performance
    print('>Accuracy real: %.0f%%, fake: %.0f%%' % (acc_real*100, acc_fake*100))
    # save plot
    save_plot(x_fake, epoch)
    # save the generator model tile file
    filename = 'models_long/generator_model_%03d.h5' % (epoch + 1)
    g_model.save(filename)

Training function¶

In [38]:

# train the generator and discriminator
def train(g_model, d_model, gan_model, dataset, latent_dim, n_epochs=100, n_batch=128):
    bat_per_epo = int(dataset.shape[0] / n_batch)
    half_batch = int(n_batch / 2)
    # manually enumerate epochs
    for i in range(n_epochs):
        # enumerate batches over the training set
        for j in range(bat_per_epo):
            # get randomly selected 'real' samples
            X_real, y_real = generate_real_samples(dataset, half_batch)
            # generate 'fake' examples
            X_fake, y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
            # create training set for the discriminator
            X, y = vstack((X_real, X_fake)), vstack((y_real, y_fake))
            # update discriminator model weights
            d_loss, _ = d_model.train_on_batch(X, y)
            # prepare points in latent space as input for the generator
            X_gan = generate_latent_points(latent_dim, n_batch)
            # create inverted labels for the fake samples
            y_gan = ones((n_batch, 1))
            # update the generator via the discriminator's error
            g_loss = gan_model.train_on_batch(X_gan, y_gan)
            # summarize loss on this batch
            # print('>%d, %d/%d, d=%.3f, g=%.3f' % (i+1, j+1, bat_per_epo, d_loss, g_loss))
        # evaluate the model performance, sometimes
        if (i+1) % 10 == 0:
            summarize_performance(i, g_model, d_model, dataset, latent_dim)

In [18]:

# create the discriminator
d_model = define_discriminator()
d_model.summary()
# create the generator
g_model = define_generator(100)
g_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 conv2d (Conv2D)             (None, 56, 56, 64)        1792      
                                                                 
 leaky_re_lu (LeakyReLU)     (None, 56, 56, 64)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 28, 28, 128)       73856     
                                                                 
 leaky_re_lu_1 (LeakyReLU)   (None, 28, 28, 128)       0         
                                                                 
 conv2d_2 (Conv2D)           (None, 14, 14, 128)       147584    
                                                                 
 leaky_re_lu_2 (LeakyReLU)   (None, 14, 14, 128)       0         
                                                                 
 conv2d_3 (Conv2D)           (None, 7, 7, 256)         295168    
                                                                 
 leaky_re_lu_3 (LeakyReLU)   (None, 7, 7, 256)         0         
                                                                 
 flatten (Flatten)           (None, 12544)             0         
                                                                 
 dropout (Dropout)           (None, 12544)             0         
                                                                 
 dense (Dense)               (None, 1)                 12545     
                                                                 
=================================================================
Total params: 530,945
Trainable params: 530,945
Non-trainable params: 0
_________________________________________________________________
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 dense_1 (Dense)             (None, 12544)             1266944   
                                                                 
 leaky_re_lu_4 (LeakyReLU)   (None, 12544)             0         
                                                                 
 reshape (Reshape)           (None, 7, 7, 256)         0         
                                                                 
 conv2d_transpose (Conv2DTra  (None, 14, 14, 128)      524416    
 nspose)                                                         
                                                                 
 leaky_re_lu_5 (LeakyReLU)   (None, 14, 14, 128)       0         
                                                                 
 conv2d_transpose_1 (Conv2DT  (None, 28, 28, 128)      262272    
 ranspose)                                                       
                                                                 
 leaky_re_lu_6 (LeakyReLU)   (None, 28, 28, 128)       0         
                                                                 
 conv2d_transpose_2 (Conv2DT  (None, 56, 56, 128)      262272    
 ranspose)                                                       
                                                                 
 leaky_re_lu_7 (LeakyReLU)   (None, 56, 56, 128)       0         
                                                                 
 conv2d_4 (Conv2D)           (None, 56, 56, 3)         3459      
                                                                 
=================================================================
Total params: 2,319,363
Trainable params: 2,319,363
Non-trainable params: 0
_________________________________________________________________

2022-03-29 16:30:49.952561: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/conda/lib/python3.9/site-packages/cv2/../../lib64:
2022-03-29 16:30:49.952606: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-03-29 16:30:49.952627: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (5f10b69f3f42): /proc/driver/nvidia/version does not exist
2022-03-29 16:30:49.952794: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.

Testing all together (RUN)¶

In [39]:

# size of the latent space
latent_dim = 100
# create the discriminator
d_model = define_discriminator()
# create the generator
g_model = define_generator(latent_dim)
# create the gan
gan_model = define_gan(g_model, d_model)
# load image data
dataset = load_real_samples()

start = time.time()
# train model
train(g_model, d_model, gan_model, dataset, latent_dim)
print('Took %.1f seconds to execute the model' %(time.time()-start))

# Save the final model
g_model.save('NARUTO_generator_long.h5')

>Accuracy real: 58%, fake: 88%
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
>Accuracy real: 75%, fake: 94%
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
>Accuracy real: 87%, fake: 61%
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
>Accuracy real: 71%, fake: 53%
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
>Accuracy real: 81%, fake: 73%
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
>Accuracy real: 66%, fake: 95%
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
>Accuracy real: 75%, fake: 65%
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
>Accuracy real: 83%, fake: 61%
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
>Accuracy real: 74%, fake: 93%
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
>Accuracy real: 65%, fake: 98%
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
Took 10843.2 seconds to execute the model
WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.

Standalone input¶

Our Generator¶

In [147]:

# example of loading the generator model and generating images
from keras.models import load_model
from numpy.random import randn
from matplotlib import pyplot
 
# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, n_samples):
    # generate points in the latent space
    x_input = randn(latent_dim * n_samples)
    # reshape into a batch of inputs for the network
    x_input = x_input.reshape(n_samples, latent_dim)
    return x_input
 
# plot the generated images
def create_plot(examples, n):
    # plot images
    for i in range(n * n):
        # define subplot
        pyplot.subplot(n, n, 1 + i)
        # turn off axis
        pyplot.axis('off')
        # plot raw pixel data
        pyplot.imshow(examples[i, :, :])
        pyplot.show()
 
# load model
model = load_model('NARUTO_generator_long.h5')
# generate images
latent_points = generate_latent_points(100, 1)
# generate images
X = model.predict(latent_points)
# scale from [-1,1] to [0,1]
X = (X + 1) / 2.0
# plot the result
create_plot(X, 1)

WARNING:tensorflow:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.

Conclusiones¶

El procedimiento funciona, pues se puede ver que si bien los resulados no son satisfactorios, la imagen generada tiene forma humana aunque no completamente terminada.
El resultado puede ser mejorado mediante el aumento de epochs y batchs, se tienen mas de 3000 imágenes de las cuales se puede entrenar el modelo y aumentar así la calidad de la imagen final generada.
Por temas de capacidad de procesamiento no se puede aumentar los parámetros para realizar el train. Sin embargo se puede ver el resultado que va en dirección correcta.
El entraniemto de 100 epochs tardó 3 horas.