-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathlogger.py
52 lines (45 loc) · 2.45 KB
/
logger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import logging
import os
import time
import numpy as np
from torch.utils.tensorboard import SummaryWriter
class Logger():
def __init__(self, logdir, run_name):
self.log_name = logdir + '/' + run_name
self.tf_writer = None
self.start_time = time.time()
self.n_eps = 0
if not os.path.exists(self.log_name):
os.makedirs(self.log_name)
self.writer = SummaryWriter(self.log_name)
logging.basicConfig(
level=logging.DEBUG,
format='%(asctime)s %(message)s',
handlers=[
logging.StreamHandler(),
logging.FileHandler(self.log_name + '/logger.log'),
],
datefmt='%Y/%m/%d %I:%M:%S %p'
)
def log_episode(self, steps, reward, option_lengths, ep_steps, epsilon):
self.n_eps += 1
logging.info(f"> ep {self.n_eps} done. total_steps={steps} | reward={reward} | episode_steps={ep_steps} "\
f"| hours={(time.time()-self.start_time) / 60 / 60:.3f} | epsilon={epsilon:.3f}")
self.writer.add_scalar(tag="episodic_rewards", scalar_value=reward, global_step=self.n_eps)
self.writer.add_scalar(tag='episode_lengths', scalar_value=ep_steps, global_step=self.n_eps)
# Keep track of options statistics
for option, lens in option_lengths.items():
# Need better statistics for this one, point average is terrible in this case
self.writer.add_scalar(tag=f"option_{option}_avg_length", scalar_value=np.mean(lens) if len(lens)>0 else 0, global_step=self.n_eps)
self.writer.add_scalar(tag=f"option_{option}_active", scalar_value=sum(lens)/ep_steps, global_step=self.n_eps)
def log_data(self, step, actor_loss, critic_loss, entropy, epsilon):
if actor_loss:
self.writer.add_scalar(tag="actor_loss", scalar_value=actor_loss.item(), global_step=step)
if critic_loss:
self.writer.add_scalar(tag="critic_loss", scalar_value=critic_loss.item(), global_step=step)
self.writer.add_scalar(tag="policy_entropy", scalar_value=entropy, global_step=step)
self.writer.add_scalar(tag="epsilon",scalar_value=epsilon, global_step=step)
if __name__=="__main__":
logger = Logger(logdir='runs/', run_name='test_model-test_env')
steps = 200 ; reward = 5 ; option_lengths = {opt: np.random.randint(0,5,size=(5)) for opt in range(5)} ; ep_steps = 50
logger.log_episode(steps, reward, option_lengths, ep_steps)