diff --git a/README.md b/README.md index 07d7fb7..dd950fb 100644 --- a/README.md +++ b/README.md @@ -15,6 +15,72 @@ memory systems. See the [paper](todo/update/the/paper) for more information. 1. Running in a virtual environment (e.g., conda, virtualenv, etc.) is highly recommended so that you don't mess up with the system python. 1. This env is added to the PyPI server. Just run: `pip install room-env` +## RoomEnv-v1 + +```python +import gym +import room_env +import random + +env = gym.make("RoomEnv-v1") +observation, info = env.reset() +rewards = 0 +while True: + + # There is one different thing in the RoomEnv from the original AAAI-2023 paper: + # The reward is either +1 or -1, instead of +1 or 0. + observation, reward, done, truncated, info = env.step(random.randint(0,2)) + rewards += reward + if done: + break +``` + +Every time when an agent takes an action, the environment will give you three memory +systems (i.e., episodic, semantic, and short-term), as an `observation`. The goal of the +agent is to learn a memory management policy. The actions are: + +- 0: Put the short-term memory into the epiosdic memory system. +- 1: Put it into the semantic. +- 2: Just forget it. + +The memory systems will be managed according to your actions, and they will eventually +used to answer questions. You don't have to worry about the question answering. It's done +by the environment. The better you manage your memory systems, the higher chances that +your agent can answer more questions correctly! + +The default parameters for the environment are + +```json +{ + "des_size": 'l', + "seed": 42, + "policies": {"encoding": "argmax", + "memory_management": "RL", + "question_answer": "episodic_semantic"}, + "capacity": {"episodic": 16, "semantic": 16, "short": 1}, + "question_prob": 1.0, + "observation_params": "perfect", + "allow_random_human": False, + "allow_random_question": False, + "total_episode_rewards": 128, + "pretrain_semantic": False, + "check_resources": True, + "varying_rewards": False +} +``` + +If you want to create an env with a different set of parameters, you can do so. For example: + +```python +env_params = {"seed": 0, + "capacity": {"episodic": 8, "semantic": 16, "short": 1}, + "pretrain_semantic": True} +env = gym.make("RoomEnv-v1", **env_params) +``` + +Take a look at [this repo](https://github.com/tae898/explicit-memory) for an actual +interaction with this environment to learn a policy. + ## Data collection Data is collected from querying ConceptNet APIs. For simplicity, we only collect triples @@ -29,6 +95,9 @@ python collect_data.py ## [The RoomDes](room_env/des.py) +The DES is part of RoomEnv. You don't have to care about how it works. If you are still +curious, you can read below. + You can run the RoomDes by ```python @@ -56,36 +125,6 @@ with `debug=True` it'll print events (i.e., state changes) to the console. 'previous': 'lap'}}}} ``` -## RoomEnv-v1 - -```python -import gym -import room_env - -env = gym.make("RoomEnv-v1") -observation, info = env.reset() -while True: - observation, reward, done, truncated, info = env.step(0) - if done: - break -``` - -Every time when an agent takes an action, the environment will give you three memory -systems (i.e., episodic, semantic, and short-term), as an `observation`. The goal of the -agent is to learn a memory management policy. The actions are: - -- 0: Put the short-term memory into the epiosdic memory system. -- 1: Put it into the semantic. -- 2: Just forget it. - -The memory systems will be managed according to your actions, and they will eventually -used to answer questions. You don't have to worry about the question answering. It's done -by the environment. The better you manage your memory systems, the higher chances that -your agent can answer more questions correctly! - -Take a look at [this repo](https://github.com/tae898/explicit-memory) for an actual -interaction with this environment to learn a policy. - ## Contributing Contributions are what make the open source community such an amazing place to be learn, diff --git a/room-env-v1.ipynb b/room-env-v1.ipynb index f24d7e3..dc63044 100644 --- a/room-env-v1.ipynb +++ b/room-env-v1.ipynb @@ -27,30 +27,30 @@ "name": "stdout", "output_type": "stream", "text": [ - "{2: {'episodic': {'mean': -45.5, 'std': 5.92},\n", - " 'pre_sem': {'mean': -42.3, 'std': 5.021},\n", - " 'random': {'mean': -46.5, 'std': 5.608},\n", - " 'semantic': {'mean': -37.9, 'std': 6.236}},\n", - " 4: {'episodic': {'mean': -38.5, 'std': 5.886},\n", - " 'pre_sem': {'mean': -34.7, 'std': 5.728},\n", - " 'random': {'mean': -36.1, 'std': 10.885},\n", - " 'semantic': {'mean': -27.1, 'std': 5.186}},\n", - " 8: {'episodic': {'mean': -27.9, 'std': 6.963},\n", - " 'pre_sem': {'mean': -20.9, 'std': 7.162},\n", - " 'random': {'mean': -26.3, 'std': 7.128},\n", - " 'semantic': {'mean': -1.5, 'std': 5.201}},\n", - " 16: {'episodic': {'mean': -10.1, 'std': 7.382},\n", - " 'pre_sem': {'mean': -0.9, 'std': 9.534},\n", - " 'random': {'mean': 2.3, 'std': 8.603},\n", - " 'semantic': {'mean': 18.5, 'std': 6.859}},\n", - " 32: {'episodic': {'mean': 25.5, 'std': 7.527},\n", - " 'pre_sem': {'mean': 44.3, 'std': 4.981},\n", - " 'random': {'mean': 19.9, 'std': 6.58},\n", - " 'semantic': {'mean': 30.1, 'std': 6.789}},\n", - " 64: {'episodic': {'mean': 64.3, 'std': 4.518},\n", - " 'pre_sem': {'mean': 53.1, 'std': 5.485},\n", - " 'random': {'mean': 28.5, 'std': 11.147},\n", - " 'semantic': {'mean': 30.1, 'std': 8.443}}}\n" + "{2: {'episodic': {'mean': -97.2, 'std': 7.111},\n", + " 'pre_sem': {'mean': -88.8, 'std': 8.01},\n", + " 'random': {'mean': -97.8, 'std': 5.618},\n", + " 'semantic': {'mean': -79.6, 'std': 7.526}},\n", + " 4: {'episodic': {'mean': -84.2, 'std': 7.718},\n", + " 'pre_sem': {'mean': -75.0, 'std': 7.169},\n", + " 'random': {'mean': -79.0, 'std': 6.527},\n", + " 'semantic': {'mean': -61.6, 'std': 8.188}},\n", + " 8: {'episodic': {'mean': -62.0, 'std': 8.944},\n", + " 'pre_sem': {'mean': -47.2, 'std': 8.256},\n", + " 'random': {'mean': -51.4, 'std': 6.873},\n", + " 'semantic': {'mean': -11.4, 'std': 12.233}},\n", + " 16: {'episodic': {'mean': -19.8, 'std': 11.294},\n", + " 'pre_sem': {'mean': -5.2, 'std': 9.558},\n", + " 'random': {'mean': 0.6, 'std': 9.8},\n", + " 'semantic': {'mean': 36.6, 'std': 10.2}},\n", + " 32: {'episodic': {'mean': 50.4, 'std': 8.429},\n", + " 'pre_sem': {'mean': 87.6, 'std': 7.736},\n", + " 'random': {'mean': 35.8, 'std': 14.323},\n", + " 'semantic': {'mean': 54.4, 'std': 5.851}},\n", + " 64: {'episodic': {'mean': 128.0, 'std': 0.0},\n", + " 'pre_sem': {'mean': 107.0, 'std': 5.459},\n", + " 'random': {'mean': 54.0, 'std': 14.886},\n", + " 'semantic': {'mean': 55.2, 'std': 6.21}}}\n" ] } ], @@ -68,7 +68,7 @@ " env=\"RoomEnv-v1\",\n", " des_size=\"l\",\n", " seeds=[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],\n", - " question_prob=0.5,\n", + " question_prob=1.0,\n", " policies={\n", " \"memory_management\": \"rl\",\n", " \"question_answer\": \"episodic_semantic\",\n", diff --git a/room_env/envs/room1.py b/room_env/envs/room1.py index 55d50f1..2109668 100644 --- a/room_env/envs/room1.py +++ b/room_env/envs/room1.py @@ -67,7 +67,7 @@ def __init__( "encoding": "argmax", }, capacity: dict = {"episodic": 16, "semantic": 16, "short": 1}, - question_prob: int = 0.5, + question_prob: int = 1.0, observation_params: str = "perfect", allow_random_human: bool = False, allow_random_question: bool = False, diff --git a/room_env/utils.py b/room_env/utils.py index 187b29d..7037f0c 100644 --- a/room_env/utils.py +++ b/room_env/utils.py @@ -312,16 +312,24 @@ def run_des_seeds( if forget_short == "random": pretrain_semantic = False - capacity_ = {"episodic": capacity // 2, "semantic": capacity // 2} + capacity_ = { + "episodic": capacity // 2, + "semantic": capacity // 2, + "short": 1, + } elif forget_short == "episodic": pretrain_semantic = False - capacity_ = {"episodic": capacity, "semantic": 0} + capacity_ = {"episodic": capacity, "semantic": 0, "short": 1} elif forget_short == "semantic": pretrain_semantic = False - capacity_ = {"episodic": 0, "semantic": capacity} + capacity_ = {"episodic": 0, "semantic": capacity, "short": 1} elif forget_short == "pre_sem": pretrain_semantic = True - capacity_ = {"episodic": capacity // 2, "semantic": capacity // 2} + capacity_ = { + "episodic": capacity // 2, + "semantic": capacity // 2, + "short": 1, + } else: raise ValueError @@ -559,16 +567,24 @@ def get_handcrafted( if forget_short == "random": pretrain_semantic = False - capacity_ = {"episodic": capacity // 2, "semantic": capacity // 2} + capacity_ = { + "episodic": capacity // 2, + "semantic": capacity // 2, + "short": 1, + } elif forget_short == "episodic": pretrain_semantic = False - capacity_ = {"episodic": capacity, "semantic": 0} + capacity_ = {"episodic": capacity, "semantic": 0, "short": 1} elif forget_short == "semantic": pretrain_semantic = False - capacity_ = {"episodic": 0, "semantic": capacity} + capacity_ = {"episodic": 0, "semantic": capacity, "short": 1} elif forget_short == "pre_sem": pretrain_semantic = True - capacity_ = {"episodic": capacity // 2, "semantic": capacity // 2} + capacity_ = { + "episodic": capacity // 2, + "semantic": capacity // 2, + "short": 1, + } else: raise ValueError diff --git a/setup.cfg b/setup.cfg index 394e726..4e50fc8 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = room_env -version = 0.2.3 +version = 1.0.0 author = Taewoon Kim author_email = tae898@gmail.com description = The Room environment diff --git a/test/test_room_env_v1.py b/test/test_room_env_v1.py index 32f628d..d60e7f3 100644 --- a/test/test_room_env_v1.py +++ b/test/test_room_env_v1.py @@ -37,7 +37,7 @@ def test_all(self) -> None: done, truncated, info, - ) = env.step(0) + ) = env.step(random.randint(0, 2)) if done: break