canesee-project · Rawan19 · May 19, 2020 · May 19, 2020 · May 20, 2020 · Jun 15, 2020
diff --git a/scenes/BahdanauAttentionTest.py b/scenes/BahdanauAttentionTest.py
@@ -0,0 +1,38 @@
+#import packages
+import tensorflow as tf
+
+import numpy as np
+
+path_w='weights/'
+
+class BahdanauAttentionTest(tf.keras.Model):
+  def __init__(self, units):
+    super(BahdanauAttentionTest, self).__init__()
+    C = tf.keras.initializers.Constant
+    w1, w2, w3, w4, w5, w6 = [np.load(path_w+"decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(4, "bahdanau_attention", j)) \
+                                  for j in range(6)]
+    self.W1 = tf.keras.layers.Dense(units, kernel_initializer=C(w1), bias_initializer=C(w2))
+    self.W2 = tf.keras.layers.Dense(units, kernel_initializer=C(w3), bias_initializer=C(w4))
+    self.V = tf.keras.layers.Dense(1, kernel_initializer=C(w5), bias_initializer=C(w6))
+
+  def call(self, features, hidden):
+    # features(CNN_encoder output) shape == (batch_size, 64, embedding_dim)
+
+    # hidden shape == (batch_size, hidden_size)
+    # hidden_with_time_axis shape == (batch_size, 1, hidden_size)
+    hidden_with_time_axis = tf.expand_dims(hidden, 1)
+
+    # score shape == (batch_size, 64, hidden_size)
+    score = tf.nn.tanh(self.W1(features) + self.W2(hidden_with_time_axis))
+
+    # attention_weights shape == (batch_size, 64, 1)
+    # you get 1 at the last axis because you are applying score to self.V
+    attention_weights = tf.nn.softmax(self.V(score), axis=1)
+
+    # context_vector shape after sum == (batch_size, hidden_size)
+
+    context_vector = attention_weights * features
+
+    context_vector = tf.reduce_sum(context_vector, axis=1)
+
+    return context_vector, attention_weights
diff --git a/scenes/CNN_Encoder.py b/scenes/CNN_Encoder.py
@@ -0,0 +1,22 @@
+import tensorflow as tf
+
+
+path_w='weights/'
+
+class CNN_Encoder(tf.keras.Model):
+    # Since you have already extracted the features and dumped it using pickle
+    # This encoder passes those features through a Fully connected layer
+    def __init__(self, embedding_dim):
+        super(CNN_Encoder, self).__init__()
+        # shape after fc == (batch_size, 64, embedding_dim
+
+        self.fc = tf.keras.layers.Dense(embedding_dim)
+
+    def call(self, x):
+        x = self.fc(x)
+        x = tf.nn.relu(x)
+        return x
+
+
+
+
diff --git a/scenes/Copy_of_Copy_of_Mobilenet_train_100k_ar_keras(1)(1).ipynb b/scenes/Copy_of_Copy_of_Mobilenet_train_100k_ar_keras(1)(1).ipynb
diff --git a/scenes/README.md b/scenes/README.md
@@ -1 +1,6 @@
-# Scene description/ image captioning.
+# Scene description/ image captioning.
+Usage:
+
+1-download the weights folder and put it in the same directory
+
+2-run mobilenet_inference.py : which outputs the greedy caption and the beam search caption
diff --git a/scenes/RNN_DecoderTest.py b/scenes/RNN_DecoderTest.py
@@ -0,0 +1,52 @@
+path_w="weights/"
+import mobilenet_inference
+class RNN_DecoderTest(tf.keras.Model):
+  def __init__(self, embedding_dim, units, vocab_size):
+    super(RNN_DecoderTest, self).__init__()
+    self.units = units
+
+    C = tf.keras.initializers.Constant
+    w_emb = np.load(path_w+"decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(0, "embedding", 0))
+    w_gru_1, w_gru_2, w_gru_3 = [np.load(path_w+"decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(1, "gru", j)) for j in range(3)]
+    w1, w2 = [np.load(path_w+"decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(2, "dense_1", j)) for j in range(2)]
+    w3, w4 = [np.load(path_w+"decoder_layer_weights/layer_%s_%s_weights_%s.npy" %(3, "dense_2", j)) for j in range(2)]
+
+    self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim, embeddings_initializer=C(w_emb))
+    self.gru = tf.keras.layers.GRU(self.units,
+                                   return_sequences=True,
+                                   return_state=True,
+                                   kernel_initializer=C(w_gru_1),
+                                   recurrent_initializer=C(w_gru_2),
+                                   bias_initializer=C(w_gru_3)
+                                   )
+    self.fc1 = tf.keras.layers.Dense(self.units, kernel_initializer=C(w1), bias_initializer=C(w2))
+    self.fc2 = tf.keras.layers.Dense(vocab_size, kernel_initializer=C(w3), bias_initializer=C(w4))
+
+    self.attention = BahdanauAttentionTest(self.units)
+
+  def call(self, x, features, hidden):
+    # defining attention as a separate model
+    context_vector, attention_weights = self.attention(features, hidden)
+
+    # x shape after passing through embedding == (batch_size, 1, embedding_dim)
+    x = self.embedding(x)
+
+    # x shape after concatenation == (batch_size, 1, embedding_dim + hidden_size)
+    x = tf.concat([tf.expand_dims(context_vector, 1), x], axis=-1)
+
+    # passing the concatenated vector to the GRU
+    output, state = self.gru(x)
+
+    # shape == (batch_size, max_length, hidden_size)
+    x = self.fc1(output)
+
+    # x shape == (batch_size * max_length, hidden_size)
+    x = tf.reshape(x, (-1, x.shape[2]))
+
+    # output shape == (batch_size * max_length, vocab)
+    x = self.fc2(x)
+
+    return x, state, attention_weights
+
+  def reset_state(self, batch_size):
+    return tf.zeros((batch_size, self.units))