-
Notifications
You must be signed in to change notification settings - Fork 0
/
replay_buffer.py
44 lines (35 loc) · 1.75 KB
/
replay_buffer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from collections import namedtuple, deque
import random
import numpy as np
class ReplayBuffer:
def __init__(self, buffer_size, seed):
"""Initialize a ReplayBuffer object.
Params
======
buffer_size (int): maximum size of buffer
"""
random.seed(seed)
self.memory = deque(maxlen=buffer_size) # internal memory (deque)
self.experience = namedtuple(
"Experience",
field_names=["state", "full_state", "action", "reward", "next_state", "full_next_state", "done"])
def add(self, state, full_state, action, reward, next_state, full_next_state, done):
"""Add a new experience to memory."""
e = self.experience(state, full_state, action, reward, next_state, full_next_state, done)
self.memory.append(e)
def sample(self, n):
"""Randomly sample a batch of experiences from memory."""
experiences = random.sample(self.memory, k=n)
states = np.array([e.state for e in experiences if e is not None])
full_states = np.array([e.full_state for e in experiences if e is not None])
actions = np.array([e.action for e in experiences if e is not None])
rewards = np.array([e.reward for e in experiences if e is not None])
next_states = np.array([e.next_state for e in experiences if e is not None])
full_next_states = np.array([e.full_next_state for e in experiences if e is not None])
dones = np.array([e.done for e in experiences if e is not None])
return states, full_states, actions, rewards, next_states, full_next_states, dones
def __len__(self):
"""Return the current size of internal memory."""
return len(self.memory)
def size(self):
return len(self.memory)