-
Notifications
You must be signed in to change notification settings - Fork 0
/
main_q.c
82 lines (60 loc) · 1.38 KB
/
main_q.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#include <stdio.h>
#include <string.h>
#include "stategrid.h"
char* format_action (int a)
{
switch (a) {
case ACTION_N:
return "north";
case ACTION_S:
return "south";
case ACTION_E:
return "east";
case ACTION_W:
return "west";
}
return "unknown";
}
int main (int argc, char** argv)
{
int c;
state_grid_t* sg;
agent_t ag;
int s;
int a, aprime;
double reward;
fprintf (stdout, "Q-LEARNING\n");
srand (time (NULL));
sg = state_grid_new (12, 4, 0.1, 1, 0.1);
state_grid_init (sg);
for (c=0; c<4000; c++) {
ag.s = ag.r = 0;
reward = 0;
s = sg->start;
while (ag.s != sg->end) {
a = state_grid_get_best_action (sg, s);
state_grid_take_action (sg, &ag, s, a);
aprime = state_grid_get_best_action (sg, ag.s);
state_grid_update_cost (sg, s, a, ag.r, ag.s, aprime);
s = ag.s;
reward += ag.r;
}
}
fprintf (stdout, "path:\n");
ag.s = ag.r = 0;
reward = 0;
s = sg->start;
while (ag.s != sg->end) {
state_grid_display (sg, s);
getchar ();
a = state_grid_get_best_action (sg, s);
state_grid_take_action (sg, &ag, s, a);
aprime = state_grid_get_best_action (sg, ag.s);
state_grid_update_cost (sg, s, a, ag.r, ag.s, aprime);
s = ag.s;
reward += ag.r;
}
fprintf (stdout, "reward : %.2f\n", reward);
state_grid_free (sg);
return 0;
}