fix bucket

wzhouad · wzhouad · commit efc05c19e9fc · 2018-03-16T12:10:18.000+08:00
diff --git a/config.py b/config.py
@@ -83,7 +83,7 @@
 flags.DEFINE_integer("num_threads", 4, "Number of threads in input pipeline")
 flags.DEFINE_boolean("use_cudnn", True, "Whether to use cudnn (only for GPU)")
 flags.DEFINE_boolean("is_bucket", False, "Whether to use bucketing")
-flags.DEFINE_list("bucket_range", [0, 400, 40], "range of bucket")
+flags.DEFINE_list("bucket_range", [0, 401, 40], "range of bucket")
 
 flags.DEFINE_integer("batch_size", 64, "Batch size")
 flags.DEFINE_integer("num_steps", 60000, "Number of steps")
@@ -101,8 +101,10 @@
 # Extensions (Uncomment corresponding line in download.sh to download the required data)
 glove_char_file = os.path.join(
     home, "data", "glove", "glove.840B.300d-char.txt")
-flags.DEFINE_string("glove_char_file", glove_char_file, "Glove character embedding")
-flags.DEFINE_boolean("pretrained_char", False, "Whether to use pretrained char embedding")
+flags.DEFINE_string("glove_char_file", glove_char_file,
+                    "Glove character embedding")
+flags.DEFINE_boolean("pretrained_char", False,
+                     "Whether to use pretrained char embedding")
 
 fasttext_file = os.path.join(home, "data", "fasttext", "wiki-news-300d-1M.vec")
 flags.DEFINE_string("fasttext_file", fasttext_file, "Fasttext word embedding")
diff --git a/util.py b/util.py
@@ -46,8 +46,12 @@ def get_batch_dataset(record_file, parser, config):
         def key_func(context_idxs, ques_idxs, context_char_idxs, ques_char_idxs, y1, y2, qa_id):
             c_len = tf.reduce_sum(
                 tf.cast(tf.cast(context_idxs, tf.bool), tf.int32))
-            t = tf.clip_by_value(buckets, 0, c_len)
-            return tf.argmax(t)
+            buckets_min = buckets[:-1]
+            buckets_max = buckets[1:]
+            conditions_c = tf.logical_and(tf.less_equal(
+                buckets_min, c_len), tf.less(c_len, buckets_max))
+            bucket_id = tf.reduce_min(tf.where(conditions_c))
+            return bucket_id
 
         def reduce_func(key, elements):
             return elements.batch(config.batch_size)