-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathexperiments.sh
executable file
·98 lines (80 loc) · 6.27 KB
/
experiments.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Commands for the experiments in the paper. These will write to stdout, and are meant to be run individually.
# Most experiments should run in seconds, though some can take minutes (especially with the sampling algorithm).
###############
# Section 5.2 #
###############
# Comparison to baselines (Table 1 and Figure 2)
# Room: Specified reward, deviation, reachability, RLSP
python src/run.py -e room -p default -c additive -i spec -d true_reward,final_reward -T 7 -x 20
python src/run.py -e room -p default -c additive -i deviation -d true_reward,final_reward -T 7 -x 20 -w 0.5
python src/run.py -e room -p default -c additive -i reachability -d true_reward,final_reward -T 7 -x 20
python src/run.py -e room -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 7 -x 20
# Train:
python src/run.py -e train -p default -c additive -i spec -d true_reward,final_reward -T 8 -x 20
python src/run.py -e train -p default -c additive -i deviation -d true_reward,final_reward -T 8 -x 20 -w 0.5
python src/run.py -e train -p default -c additive -i reachability -d true_reward,final_reward -T 8 -x 20
python src/run.py -e train -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 8 -x 20
# Apples:
python src/run.py -e apples -p default -c additive -i spec -d true_reward,final_reward -T 11 -x 20
python src/run.py -e apples -p default -c additive -i deviation -d true_reward,final_reward -T 11 -x 20 -w 0.5
python src/run.py -e apples -p default -c additive -i reachability -d true_reward,final_reward -T 11 -x 20
python src/run.py -e apples -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 11 -x 20
# Batteries, easy:
python src/run.py -e batteries -p easy -c additive -i spec -d true_reward,final_reward -T 11 -x 20
python src/run.py -e batteries -p easy -c additive -i deviation -d true_reward,final_reward -T 11 -x 20 -w 0.5
python src/run.py -e batteries -p easy -c additive -i reachability -d true_reward,final_reward -T 11 -x 20
python src/run.py -e batteries -p easy -c additive -i rlsp -d true_reward,final_reward -s 0 -T 11 -x 20
# Batteries, hard:
python src/run.py -e batteries -p default -c additive -i spec -d true_reward,final_reward -T 11 -x 20
python src/run.py -e batteries -p default -c additive -i deviation -d true_reward,final_reward -T 11 -x 20 -w 0.5
python src/run.py -e batteries -p default -c additive -i reachability -d true_reward,final_reward -T 11 -x 20
python src/run.py -e batteries -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 11 -x 20
# Far away vase:
python src/run.py -e room -p bad -c additive -i spec -d true_reward,final_reward -T 5 -x 20
python src/run.py -e room -p bad -c additive -i deviation -d true_reward,final_reward -T 5 -x 20 -w 0.5
python src/run.py -e room -p bad -c additive -i reachability -d true_reward,final_reward -T 5 -x 20
python src/run.py -e room -p bad -c additive -i rlsp -d true_reward,final_reward -s 0 -T 5 -x 20
###############
# Section 5.3 #
###############
# Comparison between knowing the s_{-T} vs. using a uniform distribution over s_{-T}
# The commands are the same in the knowing the s_{-T} case; for the uniform distribution we simply add -u True
python src/run.py -e room -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 7 -x 20
python src/run.py -e room -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 7 -x 20 -u True
python src/run.py -e train -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 8 -x 20
python src/run.py -e train -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 8 -x 20 -u True
python src/run.py -e apples -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 11 -x 20
python src/run.py -e apples -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 11 -x 20 -u True
python src/run.py -e batteries -p easy -c additive -i rlsp -d true_reward,final_reward -s 0 -T 11 -x 20
python src/run.py -e batteries -p easy -c additive -i rlsp -d true_reward,final_reward -s 0 -T 11 -x 20 -u True
python src/run.py -e batteries -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 11 -x 20
python src/run.py -e batteries -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 11 -x 20 -u True
python src/run.py -e room -p bad -c additive -i rlsp -d true_reward,final_reward -s 0 -T 5 -x 20
python src/run.py -e room -p bad -c additive -i rlsp -d true_reward,final_reward -s 0 -T 5 -x 20 -u True
###############
# Section 5.4 #
###############
# Robustness to the choice of Alice's planning horizon T.
# Simply take the RLSP commands from before and try different values of T, for example:
python src/run.py -e room -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 20 -x 20
python src/run.py -e apples -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 20 -x 20
# It is also possible to run with multiple values of T and collect the results in an output file, see src/run.py for details.
##############
# Appendix C #
##############
# MCMC sampling
# Simply replace -i rlsp with -i sampling:
python src/run.py -e room -p default -c additive -i sampling -d true_reward,final_reward -s 0,1,2,3,4 -T 7 -x 20
python src/run.py -e train -p default -c additive -i sampling -d true_reward,final_reward -s 0,1,2,3,4 -T 8 -x 20
python src/run.py -e apples -p default -c additive -i sampling -d true_reward,final_reward -s 0,1,2,3,4 -T 11 -x 20
python src/run.py -e batteries -p easy -c additive -i sampling -d true_reward,final_reward -s 0,1,2,3,4 -T 11 -x 20
python src/run.py -e batteries -p default -c additive -i sampling -d true_reward,final_reward -s 0,1,2,3,4 -T 11 -x 20
python src/run.py -e room -p bad -c additive -i sampling -d true_reward,final_reward -s 0,1,2,3,4 -T 5 -x 20
##############
# Appendix D #
##############
# Use -c additive for the Additive method, and -c bayesian for the Bayesian method
# Use the -k parameter to control the standard deviation (set to 0.5 by default)
# Note that since the Apples environment has no specified reward, the -c option has no effect on it.
python src/run.py -e room -p default -c additive -i rlsp -d true_reward,final_reward -s 0 -T 7 -x 20 -k 1
python src/run.py -e room -p default -c bayesian -i rlsp -d true_reward,final_reward -s 0 -T 7 -x 20 -k 1