Advanced networks¶
El presente proyecto se utilizan redes neuronales con arquitectura GAN para leer un banco de imágenes de la serie animé "Naruto" en la cual salen caracterizados los distintos personajes de la serie, para luego poder generar un personaje propio.
In [1]:
import numpy as np
from numpy import expand_dims
from numpy import zeros
from numpy import ones
from numpy import vstack
from numpy.random import randn
from numpy.random import randint
from tensorflow.keras.datasets.mnist import load_data
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Conv2DTranspose
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.layers import Dropout
from matplotlib import pyplot as plt
from skimage import io
from skimage.transform import resize
import time
import os
from os import listdir
from os.path import isfile, join
#from google.colab.patches import cv2_imshow # for image display
import cv2
2022-03-29 16:24:26.620501: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory 2022-03-29 16:24:26.620548: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
In [4]:
# On your laptops
#os.mkdir('images_long')
#os.mkdir('models_long')
In [75]:
print('Files in this folder:', os.listdir('images_long'))
Files in this folder: ['generated_plot_e040.png', 'generated_plot_e100.png', 'generated_plot_e080.png', 'generated_plot_e090.png', 'generated_plot_e010.png', 'generated_plot_e050.png', 'generated_plot_e060.png', '.ipynb_checkpoints', 'generated_plot_e030.png', 'generated_plot_e020.png', 'generated_plot_e070.png']
In [76]:
print('Files in this folder:', os.listdir('models_long'))
Files in this folder: ['generator_model_060.h5', 'generator_model_040.h5', 'generator_model_080.h5', 'generator_model_020.h5', 'generator_model_010.h5', 'generator_model_070.h5', 'generator_model_090.h5', 'generator_model_050.h5', 'generator_model_100.h5', 'generator_model_030.h5']
Preparation of data¶
In [7]:
#cargamos la data desde el jupyter notebook
images = {}
im2 = []
img = {}
original_shape = {}
folder_path = 'Naruto/images-small'
for root, dirs, files in os.walk(folder_path):
path = root.split(os.sep)
for index, file in enumerate(files):
im2 = [ f for f in listdir(root) if isfile(join(root,f)) ] #arma ruta
images[index] = join(root,im2[index]) #nombre y ruta de la imagen
img[index] = io.imread(images[index]) #imagen
original_shape[index] = img[index].shape #shape
In [8]:
#se imprime foto y shape de primeras 30 muestras
for i in range(0,10):
plt.imshow(img[i])
plt.show()
print(img[i].shape)
(109, 110, 3)
(82, 82, 3)
(63, 63, 3)
(63, 63, 3)
(72, 72, 3)
(72, 72, 3)
(63, 64, 3)
(63, 63, 3)
(72, 81, 3)
(109, 119, 3)
In [9]:
#debido a que tamaño es muy variado, se calculará el promedio de cada eje para luego realizar un resize
x,y = 0,0
for i in range(len(img)):
x += original_shape[i][0]
y += original_shape[i][1]
x = x/len(img)
y = y/len(img)
print (x, y)
80.4988358556461 80.47002328288708
In [10]:
#se cambiará tamaño a 56x56
new_shapes = {}
data = np.zeros(shape=(len(img),56,56,3), dtype=np.uint8)
for i in range(len(img)):
im = img[i]
res = cv2.resize(img[i], (56, 56), interpolation = cv2.INTER_AREA)
new_shapes[i] = res.shape
data[i] = res
In [11]:
#verificamos promedio de nuevo dataset
x,y = 0,0
for i in range(len(img)):
x += new_shapes[i][0]
y += new_shapes[i][1]
x = x/len(img)
y = y/len(img)
print (x, y)
56.0 56.0
In [12]:
#imprimimos datos con tamaño y su shape para confirmar
for i in range(0,30):
plt.imshow(data[i])
plt.show()
print(data[i].shape)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
(56, 56, 3)
Discriminator, Generator & combined architectures (GAN)¶
In [12]:
# define the standalone discriminator model
def define_discriminator(in_shape=(56,56,3)):
model = Sequential()
model.add(Conv2D(64, (3,3), padding='same', input_shape=in_shape))
model.add(LeakyReLU(alpha=0.2))
# downsample
model.add(Conv2D(128, (3,3), strides=(2,2), padding='same'))
model.add(LeakyReLU(alpha=0.2))
# downsample
model.add(Conv2D(128, (3,3), strides=(2,2), padding='same'))
model.add(LeakyReLU(alpha=0.2))
# downsample
model.add(Conv2D(256, (3,3), strides=(2,2), padding='same'))
model.add(LeakyReLU(alpha=0.2))
# classifier
model.add(Flatten())
model.add(Dropout(0.4))
model.add(Dense(1, activation='sigmoid'))
# compile model
opt = Adam(learning_rate=0.0002, beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
return model
###############################################################################################
# define the standalone generator model
def define_generator(latent_dim):
model = Sequential()
# foundation for 7x7 image
n_nodes = 256 * 7 * 7
model.add(Dense(n_nodes, input_dim=latent_dim))
model.add(LeakyReLU(alpha=0.2))
model.add(Reshape((7, 7, 256)))
# upsample to 14x14
model.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
model.add(LeakyReLU(alpha=0.2))
# upsample to 28x28
model.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
model.add(LeakyReLU(alpha=0.2))
# upsample to 56x56
model.add(Conv2DTranspose(128, (4,4), strides=(2,2), padding='same'))
model.add(LeakyReLU(alpha=0.2))
model.add(Conv2D(3, (3,3), activation='tanh', padding='same'))
return model
###############################################################################################
# define the combined generator and discriminator model, for updating the generator
def define_gan(g_model, d_model):
# make weights in the discriminator not trainable
d_model.trainable = False
# connect them
model = Sequential()
# add generator
model.add(g_model)
# add the discriminator
model.add(d_model)
# compile model
opt = Adam(learning_rate=0.0002, beta_1=0.5)
model.compile(loss='binary_crossentropy', optimizer=opt)
return model
Data reload¶
In [13]:
# load and prepare mnist training images
def load_real_samples():
# load mnist dataset
trainX = data
# expand to 3d, e.g. add channels dimension
#X = expand_dims(trainX, axis=-1)
#X = data / 255.0
# scale from [0,255] to [-1,1]
X = (trainX - 127.5) / 127.5
return X
Generations (images, latent points)¶
In [14]:
# select real samples
def generate_real_samples(dataset, n_samples):
# choose random instances
ix = randint(0, dataset.shape[0], n_samples)
# retrieve selected images
X = dataset[ix]
# generate 'real' class labels (1)
y = ones((n_samples, 1))
return X, y
# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, n_samples):
# generate points in the latent space
x_input = randn(latent_dim * n_samples)
# reshape into a batch of inputs for the network
x_input = x_input.reshape(n_samples, latent_dim)
return x_input
# use the generator to generate n fake examples, with class labels
def generate_fake_samples(g_model, latent_dim, n_samples):
# generate points in latent space
x_input = generate_latent_points(latent_dim, n_samples)
# predict outputs
X = g_model.predict(x_input)
# create 'fake' class labels (0)
y = zeros((n_samples, 1))
return X, y
Images evolution¶
In [15]:
# create and save a plot of generated images (reversed grayscale)
def save_plot(examples, epoch, n=10):
# plot images
examples = (examples + 1) / 2.0
for i in range(n * n):
# define subplot
plt.subplot(n, n, 1 + i)
# turn off axis
plt.axis('off')
# plot raw pixel data , cmap='gray_r'
plt.imshow(examples[i])
# save plot to file
filename = 'images_long/generated_plot_e%03d.png' % (epoch+1)
plt.savefig(filename)
plt.close()
Performances display¶
In [16]:
# evaluate the discriminator, plot generated images, save generator model
def summarize_performance(epoch, g_model, d_model, dataset, latent_dim, n_samples=100):
# prepare real samples
X_real, y_real = generate_real_samples(dataset, n_samples)
# evaluate discriminator on real examples
_, acc_real = d_model.evaluate(X_real, y_real, verbose=0)
# prepare fake examples
x_fake, y_fake = generate_fake_samples(g_model, latent_dim, n_samples)
# evaluate discriminator on fake examples
_, acc_fake = d_model.evaluate(x_fake, y_fake, verbose=0)
# summarize discriminator performance
print('>Accuracy real: %.0f%%, fake: %.0f%%' % (acc_real*100, acc_fake*100))
# save plot
save_plot(x_fake, epoch)
# save the generator model tile file
filename = 'models_long/generator_model_%03d.h5' % (epoch + 1)
g_model.save(filename)
Training function¶
In [38]:
# train the generator and discriminator
def train(g_model, d_model, gan_model, dataset, latent_dim, n_epochs=100, n_batch=128):
bat_per_epo = int(dataset.shape[0] / n_batch)
half_batch = int(n_batch / 2)
# manually enumerate epochs
for i in range(n_epochs):
# enumerate batches over the training set
for j in range(bat_per_epo):
# get randomly selected 'real' samples
X_real, y_real = generate_real_samples(dataset, half_batch)
# generate 'fake' examples
X_fake, y_fake = generate_fake_samples(g_model, latent_dim, half_batch)
# create training set for the discriminator
X, y = vstack((X_real, X_fake)), vstack((y_real, y_fake))
# update discriminator model weights
d_loss, _ = d_model.train_on_batch(X, y)
# prepare points in latent space as input for the generator
X_gan = generate_latent_points(latent_dim, n_batch)
# create inverted labels for the fake samples
y_gan = ones((n_batch, 1))
# update the generator via the discriminator's error
g_loss = gan_model.train_on_batch(X_gan, y_gan)
# summarize loss on this batch
# print('>%d, %d/%d, d=%.3f, g=%.3f' % (i+1, j+1, bat_per_epo, d_loss, g_loss))
# evaluate the model performance, sometimes
if (i+1) % 10 == 0:
summarize_performance(i, g_model, d_model, dataset, latent_dim)
In [18]:
# create the discriminator
d_model = define_discriminator()
d_model.summary()
# create the generator
g_model = define_generator(100)
g_model.summary()
Model: "sequential"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
conv2d (Conv2D) (None, 56, 56, 64) 1792
leaky_re_lu (LeakyReLU) (None, 56, 56, 64) 0
conv2d_1 (Conv2D) (None, 28, 28, 128) 73856
leaky_re_lu_1 (LeakyReLU) (None, 28, 28, 128) 0
conv2d_2 (Conv2D) (None, 14, 14, 128) 147584
leaky_re_lu_2 (LeakyReLU) (None, 14, 14, 128) 0
conv2d_3 (Conv2D) (None, 7, 7, 256) 295168
leaky_re_lu_3 (LeakyReLU) (None, 7, 7, 256) 0
flatten (Flatten) (None, 12544) 0
dropout (Dropout) (None, 12544) 0
dense (Dense) (None, 1) 12545
=================================================================
Total params: 530,945
Trainable params: 530,945
Non-trainable params: 0
_________________________________________________________________
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_1 (Dense) (None, 12544) 1266944
leaky_re_lu_4 (LeakyReLU) (None, 12544) 0
reshape (Reshape) (None, 7, 7, 256) 0
conv2d_transpose (Conv2DTra (None, 14, 14, 128) 524416
nspose)
leaky_re_lu_5 (LeakyReLU) (None, 14, 14, 128) 0
conv2d_transpose_1 (Conv2DT (None, 28, 28, 128) 262272
ranspose)
leaky_re_lu_6 (LeakyReLU) (None, 28, 28, 128) 0
conv2d_transpose_2 (Conv2DT (None, 56, 56, 128) 262272
ranspose)
leaky_re_lu_7 (LeakyReLU) (None, 56, 56, 128) 0
conv2d_4 (Conv2D) (None, 56, 56, 3) 3459
=================================================================
Total params: 2,319,363
Trainable params: 2,319,363
Non-trainable params: 0
_________________________________________________________________
2022-03-29 16:30:49.952561: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /opt/conda/lib/python3.9/site-packages/cv2/../../lib64: 2022-03-29 16:30:49.952606: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303) 2022-03-29 16:30:49.952627: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (5f10b69f3f42): /proc/driver/nvidia/version does not exist 2022-03-29 16:30:49.952794: I tensorflow/core/platform/cpu_feature_guard.cc:151] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
Testing all together (RUN)¶
In [39]:
# size of the latent space
latent_dim = 100
# create the discriminator
d_model = define_discriminator()
# create the generator
g_model = define_generator(latent_dim)
# create the gan
gan_model = define_gan(g_model, d_model)
# load image data
dataset = load_real_samples()
start = time.time()
# train model
train(g_model, d_model, gan_model, dataset, latent_dim)
print('Took %.1f seconds to execute the model' %(time.time()-start))
# Save the final model
g_model.save('NARUTO_generator_long.h5')
>Accuracy real: 58%, fake: 88% WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. >Accuracy real: 75%, fake: 94% WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. >Accuracy real: 87%, fake: 61% WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. >Accuracy real: 71%, fake: 53% WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. >Accuracy real: 81%, fake: 73% WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. >Accuracy real: 66%, fake: 95% WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. >Accuracy real: 75%, fake: 65% WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. >Accuracy real: 83%, fake: 61% WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. >Accuracy real: 74%, fake: 93% WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. >Accuracy real: 65%, fake: 98% WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model. Took 10843.2 seconds to execute the model WARNING:tensorflow:Compiled the loaded model, but the compiled metrics have yet to be built. `model.compile_metrics` will be empty until you train or evaluate the model.
Standalone input¶
Our Generator¶
In [147]:
# example of loading the generator model and generating images
from keras.models import load_model
from numpy.random import randn
from matplotlib import pyplot
# generate points in latent space as input for the generator
def generate_latent_points(latent_dim, n_samples):
# generate points in the latent space
x_input = randn(latent_dim * n_samples)
# reshape into a batch of inputs for the network
x_input = x_input.reshape(n_samples, latent_dim)
return x_input
# plot the generated images
def create_plot(examples, n):
# plot images
for i in range(n * n):
# define subplot
pyplot.subplot(n, n, 1 + i)
# turn off axis
pyplot.axis('off')
# plot raw pixel data
pyplot.imshow(examples[i, :, :])
pyplot.show()
# load model
model = load_model('NARUTO_generator_long.h5')
# generate images
latent_points = generate_latent_points(100, 1)
# generate images
X = model.predict(latent_points)
# scale from [-1,1] to [0,1]
X = (X + 1) / 2.0
# plot the result
create_plot(X, 1)
WARNING:tensorflow:No training configuration found in the save file, so the model was *not* compiled. Compile it manually.
Conclusiones¶
- El procedimiento funciona, pues se puede ver que si bien los resulados no son satisfactorios, la imagen generada tiene forma humana aunque no completamente terminada.
- El resultado puede ser mejorado mediante el aumento de epochs y batchs, se tienen mas de 3000 imágenes de las cuales se puede entrenar el modelo y aumentar así la calidad de la imagen final generada.
- Por temas de capacidad de procesamiento no se puede aumentar los parámetros para realizar el train. Sin embargo se puede ver el resultado que va en dirección correcta.
- El entraniemto de 100 epochs tardó 3 horas.