forked from eyalbd2/PERL
-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathrun_full_experiment.sh
126 lines (105 loc) · 3.74 KB
/
run_full_experiment.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
#!/bin/bash
# Run this before if needed : sed -i -e 's/\r$//' run_full_experiment.sh
# Finally, run this command : sh run_full_experiment.sh\
# UNLABELED-SIZE (6000-35,000) ---> 20000
# NUM_PIVOTS (100, 200,300, 400, 500) ---> 100
# NUM_PRE_TRAIN_EPOCHS (20, 40, 60) ---> 20
# UNFROZEN_BERT_LAYERS (1, 2, 3, 5, 8, 12) ---> [2, 8]
# PIVOT_PROB (0.1, 0.3, 0.5,0.8) ---> 0.5
# NON_PIVOT_PROB (0.1, 0.3, 0.5,0.8) ---> 0.1
# PRE_TRAINED_EPOCH
# CNN_OUT_CHANNELS (16, 32, 64) ---> 32
# CNN_FILTER_SIZE (7, 9, 11) ---> 9
# BATCH_SIZE (32, 64) ---> 32
# NUM_TRAIN_EPOCHS ---> 20
DATA_DIR=stancedata
MODEL_DIR=stancemodels
DOBASELINE=False
# before running - delete the right model dir
# feminist - atheism - hillary
#for MODEL in feminist_to_atheism atheism_to_feminist feminist_to_hillary hillary_to_feminist atheism_to_hillary hillary_to_atheism
for MODEL in feminist_to_hillary
do
SRC_DOMAIN="${MODEL%_to_*}" # split model name according to '_to_' and take the prefix
TRG_DOMAIN="${MODEL#*_to_}" # split model name according to '_to_' and take the suffix
MODELS_DIR=${MODEL_DIR}/${MODEL}
# Step 1 - Select pivot features
# Pivot selection params
NUM_PIVOTS=100
PIV_MN_ST=20
LOG_NAME="log/pivot.log"
if [ $DOBASELINE == False ]
then
python utils/pivot_selection.py \
--pivot_num=${NUM_PIVOTS} \
--pivot_min_st=${PIV_MN_ST} \
--src=${DATA_DIR}/${SRC_DOMAIN} \
--dest=${DATA_DIR}/${TRG_DOMAIN} \
--log_name=${LOG_NAME}
fi
# Step 2 - Run pivot-based finetuning on a pre-trained BERT
# Finetuning params
PIVOT_PROB=0.5
NON_PIVOT_PROB=0.1
NUM_PRE_TRAIN_EPOCHS=40
SAVE_FREQ=10
UNFROZEN_BERT_LAYERS=8
mkdir -p ${MODELS_DIR}
OUTPUT_DIR_NAME=${MODELS_DIR}
PIVOTS_PATH=${DATA_DIR}/pivots/${MODEL}/${NUM_PIVOTS}_bi
if [ $DOBASELINE == True ]
then
INIT_OUTPUT_EMBEDS=False
else
INIT_OUTPUT_EMBEDS=True
fi
python perl_pretrain.py \
--src_domain=${DATA_DIR}/${SRC_DOMAIN} \
--trg_domain=${DATA_DIR}/${TRG_DOMAIN} \
--pivot_path=${PIVOTS_PATH} \
--output_dir=${OUTPUT_DIR_NAME} \
--num_train_epochs=${NUM_PRE_TRAIN_EPOCHS} \
--save_every_num_epochs=${SAVE_FREQ} \
--pivot_prob=${PIVOT_PROB} \
--non_pivot_prob=${NON_PIVOT_PROB} \
--num_of_unfrozen_bert_layers=${UNFROZEN_BERT_LAYERS} \
--init_output_embeds=${INIT_OUTPUT_EMBEDS} \
--do_baseline=${DOBASELINE} \
--train_output_embeds
# Step 3 - Train a classifier on source domain labeled data then predict and evaluate on target domain.
# Supervised task params
PRE_TRAINED_EPOCH=${NUM_PRE_TRAIN_EPOCHS}
CNN_OUT_CHANNELS=32
BATCH_SIZE=32
CNN_FILTER_SIZE=9
NUM_TRAIN_EPOCHS=10
mkdir -p 5-fold-hyper-tune
mkdir 5-fold-hyper-tune/${MODEL}/
TEMP_DIR=${MODELS_DIR}/temp
mkdir -p ${TEMP_DIR}/
mkdir -p 5-fold-hyper-tune/${MODEL}/
for FOLD_NUM in 1 2 3 4 5
do
cp ${MODELS_DIR}/pytorch_model${PRE_TRAINED_EPOCH}.bin ${TEMP_DIR}
python supervised_task_learning.py \
--in_domain_data_dir=${DATA_DIR}/${SRC_DOMAIN}/ \
--cross_domain_data_dir=${DATA_DIR}/${TRG_DOMAIN}/ \
--do_train \
--output_dir=${TEMP_DIR}/ \
--load_model \
--model_name=pytorch_model${PRE_TRAINED_EPOCH}.bin \
--cnn_window_size=${CNN_FILTER_SIZE} \
--cnn_out_channels=${CNN_OUT_CHANNELS} \
--learning_rate=5e-5 \
--train_batch_size=${BATCH_SIZE} \
--num_train_epochs=${NUM_TRAIN_EPOCHS} \
--save_according_to=acc \
--use_fold \
--fold_num=${FOLD_NUM} \
--write_log_for_each_epoch
COPY_FROM_PATH=${TEMP_DIR}/pytorch_model${PRE_TRAINED_EPOCH}.bin-final_eval_results.txt
COPY_TO_PATH=5-fold-hyper-tune/${MODEL}/ep-${PRE_TRAINED_EPOCH}_ch-${CNN_OUT_CHANNELS}_batch-${BATCH_SIZE}_filt-${CNN_FILTER_SIZE}_fold-${FOLD_NUM}.txt
cp ${COPY_FROM_PATH} ${COPY_TO_PATH}
rm ${TEMP_DIR}/*
done
done