Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arabi #19

Open
wants to merge 6 commits into
base: devel
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions scenes/BahdanauAttentionTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#import packages
import tensorflow as tf

import numpy as np

path_w='weights/'

class BahdanauAttentionTest(tf.keras.Model):
def __init__(self, units):
super(BahdanauAttentionTest, self).__init__()
C = tf.keras.initializers.Constant
w1, w2, w3, w4, w5, w6 = [np.load(path_w+"decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(4, "bahdanau_attention", j)) \
for j in range(6)]
self.W1 = tf.keras.layers.Dense(units, kernel_initializer=C(w1), bias_initializer=C(w2))
self.W2 = tf.keras.layers.Dense(units, kernel_initializer=C(w3), bias_initializer=C(w4))
self.V = tf.keras.layers.Dense(1, kernel_initializer=C(w5), bias_initializer=C(w6))

def call(self, features, hidden):
# features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)

# hidden shape == (batch_size, hidden_size)
# hidden_with_time_axis shape == (batch_size, 1, hidden_size)
hidden_with_time_axis = tf.expand_dims(hidden, 1)

# score shape == (batch_size, 64, hidden_size)
score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))

# attention_weights shape == (batch_size, 64, 1)
# you get 1 at the last axis because you are applying score to self.V
attention_weights = tf.nn.softmax(self.V(score), axis=1)

# context_vector shape after sum == (batch_size, hidden_size)

context_vector = attention_weights * features

context_vector = tf.reduce_sum(context_vector, axis=1)

return context_vector, attention_weights
22 changes: 22 additions & 0 deletions scenes/CNN_Encoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import tensorflow as tf


path_w='weights/'

class CNN_Encoder(tf.keras.Model):
# Since you have already extracted the features and dumped it using pickle
# This encoder passes those features through a Fully connected layer
def __init__(self, embedding_dim):
super(CNN_Encoder, self).__init__()
# shape after fc == (batch_size, 64, embedding_dim

self.fc = tf.keras.layers.Dense(embedding_dim)

def call(self, x):
x = self.fc(x)
x = tf.nn.relu(x)
return x




1,795 changes: 1,795 additions & 0 deletions scenes/Copy_of_Copy_of_Mobilenet_train_100k_ar_keras(1)(1).ipynb

Large diffs are not rendered by default.

7 changes: 6 additions & 1 deletion scenes/README.md
Original file line number Diff line number Diff line change
@@ -1 +1,6 @@
# Scene description/ image captioning.
# Scene description/ image captioning.
Usage:

1-download the weights folder and put it in the same directory

2-run mobilenet_inference.py : which outputs the greedy caption and the beam search caption
52 changes: 52 additions & 0 deletions scenes/RNN_DecoderTest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
path_w="weights/"
import mobilenet_inference
class RNN_DecoderTest(tf.keras.Model):
def __init__(self, embedding_dim, units, vocab_size):
super(RNN_DecoderTest, self).__init__()
self.units = units

C = tf.keras.initializers.Constant
w_emb = np.load(path_w+"decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(0, "embedding", 0))
w_gru_1, w_gru_2, w_gru_3 = [np.load(path_w+"decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(1, "gru", j)) for j in range(3)]
w1, w2 = [np.load(path_w+"decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(2, "dense_1", j)) for j in range(2)]
w3, w4 = [np.load(path_w+"decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(3, "dense_2", j)) for j in range(2)]

self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, embeddings_initializer=C(w_emb))
self.gru = tf.keras.layers.GRU(self.units,
return_sequences=True,
return_state=True,
kernel_initializer=C(w_gru_1),
recurrent_initializer=C(w_gru_2),
bias_initializer=C(w_gru_3)
)
self.fc1 = tf.keras.layers.Dense(self.units, kernel_initializer=C(w1), bias_initializer=C(w2))
self.fc2 = tf.keras.layers.Dense(vocab_size, kernel_initializer=C(w3), bias_initializer=C(w4))

self.attention = BahdanauAttentionTest(self.units)

def call(self, x, features, hidden):
# defining attention as a separate model
context_vector, attention_weights = self.attention(features, hidden)

# x shape after passing through embedding == (batch_size, 1, embedding_dim)
x = self.embedding(x)

# x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)

# passing the concatenated vector to the GRU
output, state = self.gru(x)

# shape == (batch_size, max_length, hidden_size)
x = self.fc1(output)

# x shape == (batch_size * max_length, hidden_size)
x = tf.reshape(x, (-1, x.shape[2]))

# output shape == (batch_size * max_length, vocab)
x = self.fc2(x)

return x, state, attention_weights

def reset_state(self, batch_size):
return tf.zeros((batch_size, self.units))
Loading