Skip to content

Commit 2f4e33f

Browse files
author
wzhouad
committed
initial conmmit
1 parent 0a05895 commit 2f4e33f

File tree

7 files changed

+837
-0
lines changed

7 files changed

+837
-0
lines changed

config.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import os
2+
import tensorflow as tf
3+
4+
from prepro import prepro
5+
from main import train, test
6+
7+
flags = tf.flags
8+
9+
os.environ["CUDA_VISIBLE_DEVICES"] = "0"
10+
11+
home = os.path.expanduser("~")
12+
train_file = os.path.join(home, "data", "squad", "train-v1.1.json")
13+
dev_file = os.path.join(home, "data", "squad", "dev-v1.1.json")
14+
test_file = os.path.join(home, "data", "squad", "dev-v1.1.json")
15+
glove_file = os.path.join(home, "data", "glove", "glove.840B.300d.txt")
16+
17+
target_dir = "data"
18+
log_dir = "log/event"
19+
save_dir = "log/model"
20+
train_record_file = os.path.join(target_dir, "train.tfrecords")
21+
dev_record_file = os.path.join(target_dir, "dev.tfrecords")
22+
test_record_file = os.path.join(target_dir, "test.tfrecords")
23+
word_emb_file = os.path.join(target_dir, "word_emb.json")
24+
char_emb_file = os.path.join(target_dir, "char_emb.json")
25+
train_eval = os.path.join(target_dir, "train_eval.json")
26+
dev_eval = os.path.join(target_dir, "dev_eval.json")
27+
test_eval = os.path.join(target_dir, "test_eval.json")
28+
test_meta = os.path.join(target_dir, "test_meta.json")
29+
30+
if not os.path.exists(target_dir):
31+
os.makedirs(target_dir)
32+
if not os.path.exists(log_dir):
33+
os.makedirs(log_dir)
34+
if not os.path.exists(save_dir):
35+
os.makedirs(save_dir)
36+
37+
flags.DEFINE_string("mode", "train", "Running mode")
38+
39+
flags.DEFINE_string("target_dir", target_dir, "Target directory for out data")
40+
flags.DEFINE_string("log_dir", log_dir, "Directory for tf event")
41+
flags.DEFINE_string("save_dir", save_dir, "Directory for saving model")
42+
flags.DEFINE_string("train_file", train_file, "Train source file")
43+
flags.DEFINE_string("dev_file", dev_file, "Dev source file")
44+
flags.DEFINE_string("test_file", test_file, "Test source file")
45+
flags.DEFINE_string("glove_file", glove_file, "Glove source file")
46+
47+
flags.DEFINE_string("train_record_file", train_record_file,
48+
"Out file for train data")
49+
flags.DEFINE_string("dev_record_file", dev_record_file,
50+
"Out file for dev data")
51+
flags.DEFINE_string("test_record_file", test_record_file,
52+
"Out file for test data")
53+
flags.DEFINE_string("word_emb_file", word_emb_file,
54+
"Out file for word embedding")
55+
flags.DEFINE_string("char_emb_file", char_emb_file,
56+
"Out file for char embedding")
57+
flags.DEFINE_string("train_eval_file", train_eval, "Out file for train eval")
58+
flags.DEFINE_string("dev_eval_file", dev_eval, "Out file for dev eval")
59+
flags.DEFINE_string("test_eval_file", test_eval, "Out file for test eval")
60+
flags.DEFINE_string("test_meta", test_meta, "Out file for test meta")
61+
62+
63+
flags.DEFINE_integer("glove_size", int(2.2e6), "Corpus size for Glove")
64+
flags.DEFINE_integer("glove_dim", 300, "Embedding dimension for Glove")
65+
flags.DEFINE_integer("char_dim", 8, "Embedding dimension for char")
66+
67+
flags.DEFINE_integer("para_limit", 400, "Limit length for paragraph")
68+
flags.DEFINE_integer("ques_limit", 30, "Limit length for question")
69+
flags.DEFINE_integer("char_limit", 16, "Limit length for character")
70+
flags.DEFINE_integer("word_count_limit", -1, "Min count for word")
71+
flags.DEFINE_integer("char_count_limit", -1, "Min count for char")
72+
73+
flags.DEFINE_integer("min_after_deque", 10000, "Min examples after deque")
74+
flags.DEFINE_integer("num_threads", 4, "Number of threads in file queue")
75+
flags.DEFINE_integer("capacity", 12000, "Capacity of tfrecord queue")
76+
77+
flags.DEFINE_integer("batch_size", 60, "Batch size")
78+
flags.DEFINE_integer("num_steps", 50000, "Number of steps")
79+
flags.DEFINE_integer("checkpoint", 1000,
80+
"checkpoint to save and evaluate the model")
81+
flags.DEFINE_integer("period", 100, "period to save batch loss")
82+
flags.DEFINE_integer("val_num_batches", 250,
83+
"Number of batches to evaluate the model")
84+
flags.DEFINE_float("init_lr", 0.5, "Initial learning rate for Adadelta")
85+
flags.DEFINE_float("keep_prob", 0.7, "Dropout keep prob between layers")
86+
flags.DEFINE_float("emb_keep_prob", 0.9,
87+
"Dropout keep porb for embedding layer")
88+
flags.DEFINE_float("grad_clip", 5.0, "Global Norm gradient clipping rate")
89+
flags.DEFINE_integer("hidden", 75, "Hidden size")
90+
flags.DEFINE_integer("char_hidden", 75, "GRU dimention for char")
91+
flags.DEFINE_integer("patience", 2, "Patience for learning rate decay")
92+
93+
94+
def main(_):
95+
config = flags.FLAGS
96+
if config.mode == "train":
97+
train(config)
98+
elif config.mode == "prepro":
99+
prepro(config)
100+
elif config.mode == "test":
101+
test(config)
102+
elif config.mode == "debug":
103+
config.num_steps = 2
104+
config.val_num_batches = 1
105+
config.checkpoint = 1
106+
config.period = 1
107+
train(config)
108+
else:
109+
print("Unknown mode")
110+
exit(0)
111+
112+
113+
if __name__ == "__main__":
114+
tf.app.run()

download.sh

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
#!/usr/bin/env bash
2+
3+
# Download SQuAD
4+
SQUAD_DIR=~/data/squad
5+
mkdir -p $SQUAD_DIR
6+
wget https://rajpurkar.github.io/SQuAD-explorer/dataset/train-v1.1.json -O $SQUAD_DIR/train-v1.1.json
7+
wget https://rajpurkar.github.io/SQuAD-explorer/dataset/dev-v1.1.json -O $SQUAD_DIR/dev-v1.1.json
8+
9+
10+
# Download GloVe
11+
GLOVE_DIR=~/data/glove
12+
mkdir -p $GLOVE_DIR
13+
wget http://nlp.stanford.edu/data/glove.840B.300d.zip -O $GLOVE_DIR/glove.840B.300d.zip
14+
unzip $GLOVE_DIR/glove.840B.300d.zip -d $GLOVE_DIR
15+
16+
# Download nltk language models
17+
python -m nltk.downloader 'punkt'

func.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import tensorflow as tf
2+
from tensorflow.python.ops.nn import bidirectional_dynamic_rnn
3+
from tensorflow.python.ops.rnn_cell import GRUCell
4+
5+
INF = 1e30
6+
7+
8+
def stacked_gru(inputs, batch, hidden, num_layers, seq_len, keep_prob=1.0, is_train=None, concat_layers=True, dropout_output=False, dtype=tf.float32, scope="StackedGRU"):
9+
with tf.variable_scope(scope):
10+
outputs = [inputs]
11+
for layer in range(num_layers):
12+
with tf.variable_scope("Layer_{}".format(layer)):
13+
cell_fw = GRUCell(hidden)
14+
cell_bw = GRUCell(hidden)
15+
d_inputs = dropout(
16+
outputs[-1], keep_prob=keep_prob, is_train=is_train)
17+
(out_fw, out_bw), _ = bidirectional_dynamic_rnn(
18+
cell_fw, cell_bw, d_inputs, sequence_length=seq_len, dtype=dtype)
19+
outputs.append(tf.concat([out_fw, out_bw], axis=2))
20+
if concat_layers:
21+
res = tf.concat(outputs[1:], axis=2)
22+
else:
23+
res = outputs[-1]
24+
if dropout_output:
25+
res = dropout(res, keep_prob=keep_prob, is_train=is_train)
26+
return res
27+
28+
29+
def dropout(args, keep_prob, is_train, mode="recurrent"):
30+
if keep_prob < 1.0:
31+
noise_shape = None
32+
shape = args.get_shape().as_list()
33+
if mode == "embedding":
34+
noise_shape = [shape[0], 1]
35+
if mode == "recurrent":
36+
noise_shape = [shape[0], 1, shape[-1]]
37+
args = tf.cond(is_train, lambda: tf.nn.dropout(
38+
args, keep_prob, noise_shape=noise_shape), lambda: args)
39+
return args
40+
41+
42+
def softmax_mask(val, mask):
43+
return -INF * (1 - tf.cast(mask, tf.float32)) + val
44+
45+
46+
def pointer(inputs, state, hidden, mask, scope="pointer", reuse=False):
47+
with tf.variable_scope(scope):
48+
u = tf.concat([tf.tile(tf.expand_dims(state, axis=1), [
49+
1, tf.shape(inputs)[1], 1]), inputs], axis=2)
50+
s0 = tf.nn.tanh(tf.layers.dense(
51+
u, hidden, use_bias=False, name="s0", reuse=reuse))
52+
s = tf.layers.dense(s0, 1, use_bias=False, name="s", reuse=reuse)
53+
s1 = softmax_mask(tf.squeeze(s, [2]), mask)
54+
a = tf.expand_dims(tf.nn.softmax(s1), axis=2)
55+
res = tf.reduce_sum(a * inputs, axis=1)
56+
return res, s1
57+
58+
59+
def summ(memory, hidden, mask, scope="summ"):
60+
with tf.variable_scope(scope):
61+
s0 = tf.nn.tanh(tf.layers.dense(memory, hidden))
62+
s = tf.layers.dense(s0, 1, use_bias=False)
63+
s1 = softmax_mask(tf.squeeze(s, [2]), mask)
64+
a = tf.expand_dims(tf.nn.softmax(s1), axis=2)
65+
res = tf.reduce_sum(a * memory, axis=1)
66+
return res
67+
68+
69+
def dot_attention(inputs, memory, mask, hidden, keep_prob=1.0, is_train=None, scope="dot_attention"):
70+
with tf.variable_scope(scope):
71+
d_inputs = dropout(inputs, keep_prob=keep_prob, is_train=is_train)
72+
d_memory = dropout(memory, keep_prob=keep_prob, is_train=is_train)
73+
74+
JX = tf.shape(inputs)[1]
75+
inputs_ = tf.layers.dense(d_inputs, hidden)
76+
memory_ = tf.layers.dense(d_memory, hidden)
77+
78+
outputs = tf.matmul(inputs_, tf.transpose(
79+
memory_, [0, 2, 1])) / hidden ** 0.5
80+
mask = tf.tile(tf.expand_dims(mask, axis=1), [1, JX, 1])
81+
logits = tf.nn.softmax(softmax_mask(outputs, mask))
82+
outputs = tf.matmul(logits, memory)
83+
res = tf.concat([inputs, outputs], axis=2)
84+
85+
dim = res.get_shape().as_list()[-1]
86+
gate = tf.nn.sigmoid(tf.layers.dense(res, dim, use_bias=False))
87+
return res * gate

main.py

Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
import tensorflow as tf
2+
import json
3+
import numpy as np
4+
from tqdm import tqdm
5+
import os
6+
7+
from model import Model
8+
from util import create_batch, convert_tokens, evaluate
9+
10+
11+
def train(config):
12+
with open(config.word_emb_file, "r") as fh:
13+
word_mat = np.array(json.load(fh), dtype=np.float32)
14+
with open(config.char_emb_file, "r") as fh:
15+
char_mat = np.array(json.load(fh), dtype=np.float32)
16+
with open(config.train_eval_file, "r") as fh:
17+
train_eval_file = json.load(fh)
18+
with open(config.dev_eval_file, "r") as fh:
19+
dev_eval_file = json.load(fh)
20+
21+
print("Building model...")
22+
train_batch = create_batch(config.train_record_file, config)
23+
dev_batch = create_batch(config.dev_record_file, config)
24+
with tf.variable_scope("model"):
25+
model_train = Model(config, train_batch, word_mat, char_mat)
26+
tf.get_variable_scope().reuse_variables()
27+
model_dev = Model(config, dev_batch, word_mat,
28+
char_mat, trainable=False)
29+
30+
sess_config = tf.ConfigProto(allow_soft_placement=True)
31+
sess_config.gpu_options.allow_growth = True
32+
33+
loss_save = 100.0
34+
patience = 0
35+
lr = config.init_lr
36+
37+
with tf.Session(config=sess_config) as sess:
38+
writer = tf.summary.FileWriter(config.log_dir)
39+
sess.run(tf.global_variables_initializer())
40+
coord = tf.train.Coordinator()
41+
threads = tf.train.start_queue_runners(coord=coord)
42+
saver = tf.train.Saver()
43+
sess.run(tf.assign(model_train.is_train,
44+
tf.constant(True, dtype=tf.bool)))
45+
sess.run(tf.assign(model_train.lr, tf.constant(lr, dtype=tf.float32)))
46+
47+
for _ in tqdm(range(1, config.num_steps + 1)):
48+
global_step = sess.run(model_train.global_step) + 1
49+
loss, train_op = sess.run([model_train.loss, model_train.train_op])
50+
if global_step % config.period == 0:
51+
loss_sum = tf.Summary(value=[tf.Summary.Value(
52+
tag="model/loss", simple_value=loss), ])
53+
writer.add_summary(loss_sum, global_step)
54+
if global_step % config.checkpoint == 0:
55+
sess.run(tf.assign(model_train.is_train,
56+
tf.constant(False, dtype=tf.bool)))
57+
_, summ = evaluate_batch(
58+
model_train, config.val_num_batches, train_eval_file, sess, "train")
59+
for s in summ:
60+
writer.add_summary(s, global_step)
61+
62+
metrics, summ = evaluate_batch(
63+
model_dev, config.val_num_batches, dev_eval_file, sess, "dev")
64+
sess.run(tf.assign(model_train.is_train,
65+
tf.constant(True, dtype=tf.bool)))
66+
67+
dev_loss = metrics["loss"]
68+
if dev_loss < loss_save:
69+
loss_save = dev_loss
70+
patience = 0
71+
else:
72+
patience += 1
73+
if patience >= config.patience:
74+
lr /= 2.0
75+
loss_save = dev_loss
76+
patience = 0
77+
sess.run(tf.assign(model_train.lr,
78+
tf.constant(lr, dtype=tf.float32)))
79+
for s in summ:
80+
writer.add_summary(s, global_step)
81+
writer.flush()
82+
filename = os.path.join(
83+
config.save_dir, "model_{}.ckpt".format(global_step))
84+
saver.save(sess, filename)
85+
coord.request_stop()
86+
coord.join(threads)
87+
88+
89+
def test(config):
90+
with open(config.word_emb_file, "r") as fh:
91+
word_mat = np.array(json.load(fh), dtype=np.float32)
92+
with open(config.char_emb_file, "r") as fh:
93+
char_mat = np.array(json.load(fh), dtype=np.float32)
94+
with open(config.test_eval_file, "r") as fh:
95+
eval_file = json.load(fh)
96+
with open(config.test_meta, "r") as fh:
97+
meta = json.load(fh)
98+
99+
total = meta["total"]
100+
101+
print("Loading model...")
102+
test_batch = create_batch(config.test_record_file, config, test=True)
103+
with tf.variable_scope("model"):
104+
model = Model(config, test_batch, word_mat, char_mat, trainable=False)
105+
106+
sess_config = tf.ConfigProto(allow_soft_placement=True)
107+
sess_config.gpu_options.allow_growth = True
108+
109+
with tf.Session(config=sess_config) as sess:
110+
init_op = tf.group(tf.global_variables_initializer(),
111+
tf.local_variables_initializer())
112+
sess.run(init_op)
113+
coord = tf.train.Coordinator()
114+
threads = tf.train.start_queue_runners(coord=coord)
115+
saver = tf.train.Saver()
116+
saver.restore(sess, tf.train.latest_checkpoint(config.save_dir))
117+
sess.run(tf.assign(model.is_train, tf.constant(False, dtype=tf.bool)))
118+
losses = []
119+
answer_dict = {}
120+
for step in tqdm(range(total // config.batch_size)):
121+
qa_id, loss, yp1, yp2 = sess.run(
122+
[model.qa_id, model.loss, model.yp1, model.yp2])
123+
answer_dict.update(convert_tokens(
124+
eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()))
125+
losses.append(loss)
126+
coord.request_stop()
127+
coord.join(threads)
128+
loss = np.mean(losses)
129+
metrics = evaluate(eval_file, answer_dict)
130+
print("Exact Match: {}, F1: {}".format(
131+
metrics['exact_match'], metrics['f1']))
132+
133+
134+
def evaluate_batch(model, num_batches, eval_file, sess, data_type):
135+
answer_dict = {}
136+
losses = []
137+
for _ in tqdm(range(1, num_batches + 1)):
138+
qa_id, loss, yp1, yp2, = sess.run(
139+
[model.qa_id, model.loss, model.yp1, model.yp2])
140+
answer_dict.update(convert_tokens(
141+
eval_file, qa_id.tolist(), yp1.tolist(), yp2.tolist()))
142+
losses.append(loss)
143+
loss = np.mean(losses)
144+
metrics = evaluate(eval_file, answer_dict)
145+
metrics["loss"] = loss
146+
loss_sum = tf.Summary(value=[tf.Summary.Value(
147+
tag="{}/loss".format(data_type), simple_value=metrics["loss"]), ])
148+
f1_sum = tf.Summary(value=[tf.Summary.Value(
149+
tag="{}/f1".format(data_type), simple_value=metrics["f1"]), ])
150+
em_sum = tf.Summary(value=[tf.Summary.Value(
151+
tag="{}/em".format(data_type), simple_value=metrics["exact_match"]), ])
152+
return metrics, [loss_sum, f1_sum, em_sum]

0 commit comments

Comments
 (0)