-
Notifications
You must be signed in to change notification settings - Fork 2
/
run.sh
201 lines (170 loc) · 6.86 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
#!bin/bash
# NOTE: env vars are now passed via docker run --env-file
#set -a # automatically export all variables
#source config.env
#set +a
set -e # exit on error
if [[ -z "${HUGGINGFACEHUB_API_TOKEN}" ]]; then
echo "HUGGINGFACEHUB_API_TOKEN not found. Please set it in .env file."
exit 1
fi
if [[ -z "${GPU_MEMORY_UTILIZATION}" ]]; then
gpu_memory_utilization=0.9
else
gpu_memory_utilization=$GPU_MEMORY_UTILIZATION
fi
if [[ -z "${VLLM_SWAP_SPACE}" ]]; then
swap_space=4
else
swap_space=$VLLM_SWAP_SPACE
fi
# Local TMPPATHS to store intermediate files
if [[ -z "${COTEVAL_CACHE_DIR}" ]]; then
COTEVAL_CACHE_DIR="./cot-eval-cache"
fi
LOTMP_NEXTMODELINFO="$COTEVAL_CACHE_DIR/next_model.json" # stores info about which model to evaluate next
LOTMP_CONFIGKEYSINFO="$COTEVAL_CACHE_DIR/config_keys.txt" # stores names of cot-eval configs that will be used
LOTMP_CONFIGSFOLDER="$COTEVAL_CACHE_DIR/cot_eval_configs" # folder with cot-eval configs that will be used
LOTMP_ELEU_CONFIGSFOLDER="$COTEVAL_CACHE_DIR/eleuther/tasks/logikon" # folder with lm-eval-harness tasks
LOTMP_ELEU_CONFIGSINFO="$COTEVAL_CACHE_DIR/lm_eval_harness_tasks.json" # groups names of lm-eval-harness tasks that will be used
LOTMP_ELEU_OUTPUTDIR="$COTEVAL_CACHE_DIR/eleuther/output" # folder with lm-eval-harness output
LOTMP_DEFAULT="$COTEVAL_CACHE_DIR/TMP" # folder with other temporary files
# cp pre-built eleuther tasks and templates to cache dir
mkdir -p $LOTMP_DEFAULT
mkdir -p $LOTMP_ELEU_CONFIGSFOLDER
cp -r ./eleuther/tasks/logikon/* $LOTMP_ELEU_CONFIGSFOLDER
##############################
# login to huggingface hub
huggingface-cli login --token $HUGGINGFACEHUB_API_TOKEN
##############################
# lookup model to-be evaluated
if [[ -z "${NEXT_MODEL_PATH}" ]]; then
python scripts/lookup_pending_model.py --keys_file $LOTMP_NEXTMODELINFO --max_params $MAX_MODEL_PARAMS --requests_repo $REQUESTS_REPO --tmp_dir $LOTMP_DEFAULT
model=$(cat $LOTMP_NEXTMODELINFO | jq -r .model)
revision=$(cat $LOTMP_NEXTMODELINFO | jq -r .revision)
precision=$(cat $LOTMP_NEXTMODELINFO | jq -r .precision)
else
model="${NEXT_MODEL_PATH}"
revision="${NEXT_MODEL_REVISION}"
precision="${NEXT_MODEL_PRECISION}"
fi
echo "Model to evaluate: $model : $revision. Precision: $precision"
# set lm-eval-harness model_args
lm_eval_model_args="pretrained=${model},revision=${revision},dtype=${precision},tensor_parallel_size=${NUM_GPUS},gpu_memory_utilization=${gpu_memory_utilization},trust_remote_code=$TRUST_REMOTE_CODE"
if [[ -z "${MAX_LENGTH}" ]]; then
echo "No MAX_LENGTH specified in config."
cot_config_extra_args=""
else
lm_eval_model_args="${lm_eval_model_args},max_length=$MAX_LENGTH"
cot_config_extra_args="--max_model_len $MAX_LENGTH"
fi
echo "lm-eval model_args: $lm_eval_model_args"
##############################
# create CoT configs
# a 'config' defines how reasoning traces are generated for a given task
python scripts/create_cot_configs.py $cot_config_extra_args \
--model $model \
--revision $revision \
--precision ${precision} \
--chains $CHAINS \
--model_kwargs "$MODELKWARGS" \
--tasks $TASKS \
--output_dir $LOTMP_CONFIGSFOLDER \
--template_path "./src/cot_eval/configs/template.yaml" \
--keys_file $LOTMP_CONFIGKEYSINFO \
--num_gpus $NUM_GPUS \
--gpu_memory_utilization $gpu_memory_utilization \
--swap_space $swap_space
configkeys=$(cat $LOTMP_CONFIGKEYSINFO) # format is "config1,config2,config3"
echo "Created configs: $configkeys and stored in $LOTMP_CONFIGSFOLDER"
##############################
# generate reasoning traces
# run cot_eval to create reasoning traces for every config (model and task)
# reasoning traces are uploaded to huggingface hub
arr_configkeys=(${configkeys//,/ })
for config in "${arr_configkeys[@]}"
do
cot-eval \
--config "${LOTMP_CONFIGSFOLDER}/${config}.yaml" \
--upload_dataset $TRACES_REPO \
--hftoken $HUGGINGFACEHUB_API_TOKEN
done
##############################
# create lm-eval-harness tasks
# a 'harness task' defines how to evaluate a given model on a given task,
# specifically whether to include the model's reasoning traces or not
python scripts/create_lm_eval_harness_tasks.py \
--model $model \
--configs $configkeys \
--output_dir $LOTMP_ELEU_CONFIGSFOLDER \
--configs_dir $LOTMP_CONFIGSFOLDER \
--traces_dataset_path $TRACES_REPO \
--keys_file $LOTMP_ELEU_CONFIGSINFO
harness_tasks_base=$(cat $LOTMP_ELEU_CONFIGSINFO | jq -r .base) # format is "task1,task2,task3"
harness_tasks_cot=$(cat $LOTMP_ELEU_CONFIGSINFO | jq -r .cot) # format is "task1,task2,task3"
echo "Created lm-eval-harness tasks base: $harness_tasks_base" # no cot
echo "Created lm-eval-harness tasks cot: $harness_tasks_cot"
timestamp=$(date +"%y-%m-%d-%T")
##############################
# ORIG evaluation
# run lm-eval originial BASE (unperturbed) for each task
if [ "$DO_BASEEVAL" = true ] ; then
arrTASKS=(${TASKS//,/ })
basetasks=$(printf "%s_base," "${arrTASKS[@]}")
basetasks=${basetasks:0:-1}
output_path=$LOTMP_ELEU_OUTPUTDIR/${model}/orig/results_${timestamp}
if [ -f $output_path ]; then
echo "Outputfile $FILE exists. Skipping eval of $basetasks."
else
lm-eval --model vllm \
--model_args $lm_eval_model_args \
--tasks $basetasks \
--num_fewshot 0 \
--batch_size auto \
--output_path $output_path \
--include_path $LOTMP_ELEU_CONFIGSFOLDER
fi
fi
##############################
# BASE and COT evaluation
# run lm evaluation harness for each of the tasks
# without reasoning traces
lm-eval --model vllm \
--model_args $lm_eval_model_args \
--tasks ${harness_tasks_base} \
--num_fewshot 0 \
--batch_size auto \
--output_path $LOTMP_ELEU_OUTPUTDIR/${model}/base/${timestamp} \
--include_path $LOTMP_ELEU_CONFIGSFOLDER
# with reasoning traces
arrHT=(${harness_tasks_cot//,/ })
ht_batch_size=5
# batched processing of harness_tasks_cot
for((i=0; i < ${#arrHT[@]}; i+=ht_batch_size))
do
ht_batch=( "${arrHT[@]:i:ht_batch_size}" )
ht_batch_s=$(printf ",%s" "${ht_batch[@]}")
ht_batch_s=${ht_batch_s:1}
echo "Evaluating cot tasks: $ht_batch_s"
lm-eval --model vllm \
--model_args $lm_eval_model_args \
--tasks ${ht_batch_s} \
--num_fewshot 0 \
--batch_size auto \
--output_path $LOTMP_ELEU_OUTPUTDIR/${model}/cot/${timestamp}_idx${i} \
--include_path $LOTMP_ELEU_CONFIGSFOLDER
done
##############################
# collect and upload results
python scripts/upload_results.py \
--model $model \
--revision $revision \
--precision $precision \
--tasks $TASKS \
--timestamp $timestamp \
--output_dir $LOTMP_ELEU_OUTPUTDIR \
--tmp_dir $LOTMP_DEFAULT \
--results_repo $RESULTS_REPO \
--requests_repo $REQUESTS_REPO \
--leaderboard_results_repo $LEADERBOARD_RESULTS_REPO \
--create_pr $CREATE_PULLREQUESTS