Recurrent input dropout

lvapeab · Apr 27, 2017 · 2be5279 · 2be5279
1 parent 2cdc3fb
commit 2be5279
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 27 deletions.
diff --git a/config.py b/config.py
@@ -6,11 +6,10 @@ def load_parameters():
     """
 
     # Input data params
-    DATASET_NAME = 'APE'                        # Task name
-    SRC_LAN = 'mt'                                  # Language of the source text
-    TRG_LAN = 'pe'                                  # Language of the target text
-    #DATA_ROOT_PATH = 'examples/%s/' % DATASET_NAME  # Path where data is stored
-    DATA_ROOT_PATH = '/media/HDD_2TB/DATASETS/%s/in-domain/joint_bpe/' % DATASET_NAME  # Path where data is stored
+    DATASET_NAME = 'EuTrans'                        # Task name
+    SRC_LAN = 'en'                                  # Language of the source text
+    TRG_LAN = 'es'                                  # Language of the target text
+    DATA_ROOT_PATH = 'examples/%s/' % DATASET_NAME  # Path where data is stored
 
     # SRC_LAN or TRG_LAN will be added to the file names
     TEXT_FILES = {'train': 'training.',        # Data files
@@ -63,7 +62,7 @@ def load_parameters():
     # Word representation params
     TOKENIZATION_METHOD = 'tokenize_none'         # Select which tokenization we'll apply.
                                                   # See Dataset class (from stager_keras_wrapper) for more info.
-    DETOKENIZATION_METHOD = 'detokenize_bpe'       # Select which de-tokenization method we'll apply
+    DETOKENIZATION_METHOD = 'tokenize_none'       # Select which de-tokenization method we'll apply
 
     APPLY_DETOKENIZATION = False                  # Wheter we apply a detokenization method 
 
@@ -95,8 +94,8 @@ def load_parameters():
     LOSS = 'categorical_crossentropy'
     CLASSIFIER_ACTIVATION = 'softmax'
 
-    OPTIMIZER = 'Adadelta'                        # Optimizer
-    LR = 1.                                       # Learning rate. Recommended values - Adam 0.001 - Adadelta 1.0
+    OPTIMIZER = 'Adam'                            # Optimizer
+    LR = 0.001                                    # Learning rate. Recommended values - Adam 0.001 - Adadelta 1.0
     CLIP_C = 1.                                   # During training, clip L2 norm of gradients to this value (0. means deactivated)
     CLIP_V = 0.                                   # During training, clip absolute value of gradients to this value (0. means deactivated)
     SAMPLE_WEIGHTS = True                         # Select whether we use a weights matrix (mask) for the data outputs
@@ -118,33 +117,33 @@ def load_parameters():
     EARLY_STOP = True                             # Turns on/off the early stop protocol
     PATIENCE = 20                                 # We'll stop if the val STOP_METRIC does not improve after this
                                                   # number of evaluations
-    STOP_METRIC = 'TER'                           # Metric for the stop
+    STOP_METRIC = 'Bleu_4'                        # Metric for the stop
 
     # Model parameters
     MODEL_TYPE = 'GroundHogModel'                 # Model to train. See model_zoo() for the supported architectures
     RNN_TYPE = 'LSTM'                             # RNN unit type ('LSTM' and 'GRU' supported)
     INIT_FUNCTION = 'glorot_uniform'              # Initialization function for matrices (see keras/initializations.py)
 
-    SOURCE_TEXT_EMBEDDING_SIZE = 300              # Source language word embedding size.
+    SOURCE_TEXT_EMBEDDING_SIZE = 420              # Source language word embedding size.
     SRC_PRETRAINED_VECTORS = None                 # Path to pretrained vectors (e.g.: DATA_ROOT_PATH + '/DATA/word2vec.%s.npy' % SRC_LAN)
                                                   # Set to None if you don't want to use pretrained vectors.
                                                   # When using pretrained word embeddings. this parameter must match with the word embeddings size
     SRC_PRETRAINED_VECTORS_TRAINABLE = True       # Finetune or not the target word embedding vectors.
 
-    TARGET_TEXT_EMBEDDING_SIZE = 300              # Source language word embedding size.
+    TARGET_TEXT_EMBEDDING_SIZE = 420              # Source language word embedding size.
     TRG_PRETRAINED_VECTORS = None                 # Path to pretrained vectors. (e.g. DATA_ROOT_PATH + '/DATA/word2vec.%s.npy' % TRG_LAN)
                                                   # Set to None if you don't want to use pretrained vectors.
                                                   # When using pretrained word embeddings, the size of the pretrained word embeddings must match with the word embeddings size.
     TRG_PRETRAINED_VECTORS_TRAINABLE = True       # Finetune or not the target word embedding vectors.
 
     # Encoder configuration
-    ENCODER_HIDDEN_SIZE = 256                     # For models with RNN encoder
+    ENCODER_HIDDEN_SIZE = 600                     # For models with RNN encoder
     BIDIRECTIONAL_ENCODER = True                  # Use bidirectional encoder
     N_LAYERS_ENCODER = 1                          # Stack this number of encoding layers
     BIDIRECTIONAL_DEEP_ENCODER = True             # Use bidirectional encoder in all encoding layers
 
     # Decoder configuration
-    DECODER_HIDDEN_SIZE = 256                     # For models with RNN decoder
+    DECODER_HIDDEN_SIZE = 600                     # For models with RNN decoder
     N_LAYERS_DECODER = 1                          # Stack this number of decoding layers.
     ADDITIONAL_OUTPUT_MERGE_MODE = 'sum'          # Merge mode for the skip-connections
     # Skip connections size
@@ -168,6 +167,9 @@ def load_parameters():
     USE_DROPOUT = False                           # Use dropout
     DROPOUT_P = 0.5                               # Percentage of units to drop
 
+    USE_RECURRENT_INPUT_DROPOUT = False           # Use dropout in input cells of recurrent layers
+    RECURRENT_INPUT_DROPOUT_P = 0.5               # Percentage of units to drop in input cells of recurrent layers
+
     USE_RECURRENT_DROPOUT = False                 # Use dropout in recurrent layers # DANGEROUS!
     RECURRENT_DROPOUT_P = 0.5                     # Percentage of units to drop in recurrent layers
 
@@ -193,7 +195,7 @@ def load_parameters():
 
     MODEL_NAME += EXTRA_NAME
 
-    STORE_PATH = '/media/HDD_2TB/MODELS/%s/trained_models/%s/' % (DATASET_NAME, MODEL_NAME) # Models and evaluation results will be stored here
+    STORE_PATH = 'trained_models/' + MODEL_NAME + '/'  # Models and evaluation results will be stored here
     DATASET_STORE_PATH = 'datasets/'                   # Dataset instance will be stored here
 
     SAMPLING_SAVE_MODE = 'list'                        # 'list' or 'vqa'

diff --git a/model_zoo.py b/model_zoo.py
@@ -1,6 +1,6 @@
 import logging
 import os
-import numpy as np
+
 from keras.layers import *
 from keras.models import model_from_json, Model
 from keras.optimizers import Adam, RMSprop, Nadam, Adadelta, SGD, Adagrad, Adamax
@@ -358,18 +358,18 @@ def GroundHogModel(self, params):
                                                                      Wa_regularizer=l2(params['WEIGHT_DECAY']),
                                                                      Ua_regularizer=l2(params['WEIGHT_DECAY']),
                                                                      ba_regularizer=l2(params['WEIGHT_DECAY']),
-                                                                     dropout_W=params['RECURRENT_DROPOUT_P'] if params[
-                                                                         'USE_RECURRENT_DROPOUT'] else None,
-                                                                     dropout_U=params['RECURRENT_DROPOUT_P'] if params[
-                                                                         'USE_RECURRENT_DROPOUT'] else None,
-                                                                     dropout_V=params['RECURRENT_DROPOUT_P'] if params[
-                                                                         'USE_RECURRENT_DROPOUT'] else None,
-                                                                     dropout_wa=params['DROPOUT_P'] if params[
-                                                                         'USE_DROPOUT'] else None,
-                                                                     dropout_Wa=params['DROPOUT_P'] if params[
-                                                                         'USE_DROPOUT'] else None,
-                                                                     dropout_Ua=params['DROPOUT_P'] if params[
-                                                                         'USE_DROPOUT'] else None,
+                                                                     dropout_W=params['RECURRENT_INPUT_DROPOUT_P'] if
+                                                                     params['USE_RECURRENT_INPUT_DROPOUT'] else None,
+                                                                     dropout_U=params['RECURRENT_DROPOUT_P'] if
+                                                                     params['USE_RECURRENT_DROPOUT'] else None,
+                                                                     dropout_V=params['RECURRENT_INPUT_DROPOUT_P'] if
+                                                                     params['USE_RECURRENT_INPUT_DROPOUT'] else None,
+                                                                     dropout_wa=params['DROPOUT_P']
+                                                                     if params['USE_DROPOUT'] else None,
+                                                                     dropout_Wa=params['DROPOUT_P']
+                                                                     if params['USE_DROPOUT'] else None,
+                                                                     dropout_Ua=params['DROPOUT_P']
+                                                                     if params['USE_DROPOUT'] else None,
                                                                      init=params['INIT_FUNCTION'],
                                                                      return_sequences=True,
                                                                      return_extra_variables=True,