-
Notifications
You must be signed in to change notification settings - Fork 64
/
sts_b-adan.yaml
58 lines (48 loc) · 930 Bytes
/
sts_b-adan.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
# @package _group_
common:
fp16: true
fp16_init_scale: 4
threshold_loss_scale: 1
fp16_scale_window: 128
log_format: json
log_interval: 200
task:
_name: sentence_prediction
data: ???
init_token: 0
separator_token: 2
num_classes: 1
max_positions: 512
checkpoint:
restore_file: ???
reset_optimizer: true
reset_dataloader: true
reset_meters: true
no_epoch_checkpoints: true
distributed_training:
find_unused_parameters: true
distributed_world_size: 1
criterion:
_name: sentence_prediction
regression_target: true
dataset:
batch_size: 16
required_batch_size_multiple: 1
max_tokens: 4400
optimizer:
_name: adan
weight_decay: 0.01
adan_betas: (0.98,0.99,0.99)
adan_eps: 1e-8
lr_scheduler:
_name: cosine
warmup_updates: 214
optimization:
clip_norm: 0.5
lr: [4e-05]
max_update: 3598
max_epoch: 10
model:
_name: roberta
dropout: 0.1
attention_dropout: 0.1