hill-a · araffin · Nov 23, 2019 · Nov 23, 2019 · Nov 23, 2019 · Nov 23, 2019
diff --git a/.github/ISSUE_TEMPLATE/issue-template.md b/.github/ISSUE_TEMPLATE/issue-template.md
@@ -10,6 +10,16 @@ If you have any questions, feel free to create an issue with the tag [question].
 If you wish to suggest an enhancement or feature request, add the tag [feature request]. 
 If you are submitting a bug report, please fill in the following details.
 
+If your issue is related to a custom gym environment, please check it first using:
+
+```python
+from stable_baselines.common.env_checker import check_env
+
+env = CustomEnv(arg1, ...)
+# It will check your custom environment and output additional warnings if needed
+check_env(env)
+```
+
 **Describe the bug**
 A clear and concise description of what the bug is.
 

diff --git a/.gitignore b/.gitignore
@@ -12,6 +12,7 @@
 __pycache__/
 _build/
 *.npz
+*.zip
 
 # Setuptools distribution and build folders.
 /dist/

diff --git a/.travis.yml b/.travis.yml
@@ -4,7 +4,7 @@ python:
 
 env:
  global:
- - DOCKER_IMAGE=stablebaselines/stable-baselines-cpu:v2.9.0
+ - DOCKER_IMAGE=stablebaselines/stable-baselines-cpu:v3.0.0
 
 notifications:
  email: false
@@ -21,29 +21,36 @@ script:
 jobs:
  include:
  # Big test suite. Run in parallel to decrease wall-clock time, and to avoid OOM error from leaks
- - stage: Test
- name: "Unit Tests a-h"
- env: TEST_GLOB="[a-h]*"
-
- - name: "Unit Tests i-l"
- env: TEST_GLOB="[i-l]*"
-
- - name: "Unit Tests m-sa"
- env: TEST_GLOB="{[m-r]*,sa*}"
-
- - name: "Unit Tests sb-z"
- env: TEST_GLOB="{s[b-z]*,[t-z]*}"
-
- - name: "Unit Tests determinism"
- env: TEST_GLOB="0deterministic.py"
+ # TODO: reactivate for tf2
+ # - stage: Test
+ # name: "Unit Tests a-h"
+ # env: TEST_GLOB="[a-h]*"
+ #
+ # - name: "Unit Tests i-l"
+ # env: TEST_GLOB="[i-l]*"
+ #
+ # - name: "Unit Tests m-sa"
+ # env: TEST_GLOB="{[m-r]*,sa*}"
+ #
+ # - name: "Unit Tests sb-z"
+ # env: TEST_GLOB="{s[b-z]*,[t-z]*}"
+ #
+ # - name: "Unit Tests determinism"
+ # env: TEST_GLOB="0deterministic.py"
 
- - name: "Sphinx Documentation"
- script:
- - 'docker run -it --rm --mount src=$(pwd),target=/root/code/stable-baselines,type=bind ${DOCKER_IMAGE} bash -c "cd /root/code/stable-baselines/ && pip install .[docs] && pushd docs/ && make clean && make html"'
-
- - name: "Type Checking"
- script:
- - 'docker run --rm --mount src=$(pwd),target=/root/code/stable-baselines,type=bind ${DOCKER_IMAGE} bash -c "cd /root/code/stable-baselines/ && pytype"'
+ - stage: Test
+ name: "Unit Tests"
+ env: TEST_GLOB="*"
+
+ # TODO: reactivate for tf2
+ # - name: "Sphinx Documentation"
+ # script:
+ # - 'docker run -it --rm --mount src=$(pwd),target=/root/code/stable-baselines,type=bind ${DOCKER_IMAGE} bash -c "cd /root/code/stable-baselines/ && pip install .[docs] && pushd docs/ && make clean && make html"'
+
+ # TODO: reactivate for tf2
+ # - name: "Type Checking"
+ # script:
+ # - 'docker run --rm --mount src=$(pwd),target=/root/code/stable-baselines,type=bind ${DOCKER_IMAGE} bash -c "cd /root/code/stable-baselines/ && pytype"'
 
  - stage: Codacy Trigger
  if: type != pull_request

diff --git a/Dockerfile b/Dockerfile
@@ -34,9 +34,9 @@ RUN \
  cd $CODE_DIR && \
  pip install --upgrade pip && \
  if [[ $USE_GPU == "True" ]]; then \
- TENSORFLOW_PACKAGE="tensorflow-gpu==1.8.0"; \
+ TENSORFLOW_PACKAGE="tensorflow-gpu"; \
  else \
- TENSORFLOW_PACKAGE="tensorflow==1.8.0"; \
+ TENSORFLOW_PACKAGE="tensorflow"; \
  fi; \
  pip install ${TENSORFLOW_PACKAGE} && \
  pip install -e .[mpi,tests] && \

diff --git a/README.md b/README.md
@@ -65,7 +65,7 @@ Documentation: https://stable-baselines.readthedocs.io/en/master/guide/rl_zoo.ht
 
 ## Installation
 
-**Note:** Stabe-Baselines supports Tensorflow versions from 1.8.0 to 1.14.0. Support for Tensorflow 2 API is planned.
+**Note:** Stabe-Baselines requires Tensorflow >= 2.x.x For older version, please look at branch Stable-Baselines < 3.x.x.
 
 ### Prerequisites
 Baselines requires python3 (>=3.5) with the development headers. You'll also need system packages CMake, OpenMPI and zlib. Those can be installed as follows
@@ -113,7 +113,9 @@ from stable_baselines.common.vec_env import DummyVecEnv
 from stable_baselines import PPO2
 
 env = gym.make('CartPole-v1')
-env = DummyVecEnv([lambda: env]) # The algorithms require a vectorized environment to run
+# Optional: PPO2 requires a vectorized environment to run
+# the env is now wrapped automatically when passing it to the constructor
+# env = DummyVecEnv([lambda: env])
 
 model = PPO2(MlpPolicy, env, verbose=1)
 model.learn(total_timesteps=10000)

diff --git a/docs/_static/img/mistake.png b/docs/_static/img/mistake.png
diff --git a/docs/common/env_checker.rst b/docs/common/env_checker.rst
@@ -0,0 +1,7 @@
+.. _env_checker:
+
+Gym Environment Checker
+========================
+
+.. automodule:: stable_baselines.common.env_checker
+ :members:
diff --git a/docs/common/evaluation.rst b/docs/common/evaluation.rst
@@ -0,0 +1,7 @@
+.. _eval:
+
+Evaluation Helper
+=================
+
+.. automodule:: stable_baselines.common.evaluation
+ :members:
diff --git a/docs/common/schedules.rst b/docs/common/schedules.rst
@@ -3,8 +3,8 @@
 Schedules
 =========
 
-Schedules are used as hyperparameter for most of the algortihms,
-in order to change value of a parameter over time (usuallly the learning rate).
+Schedules are used as hyperparameter for most of the algorithms,
+in order to change value of a parameter over time (usually the learning rate).
 
 
 .. automodule:: stable_baselines.common.schedules

diff --git a/docs/conf.py b/docs/conf.py
@@ -16,6 +16,14 @@
 import sys
 from unittest.mock import MagicMock
 
+# We CANNOT enable 'sphinxcontrib.spelling' because ReadTheDocs.org does not support
+# PyEnchant.
+try:
+ import sphinxcontrib.spelling
+ enable_spell_check = True
+except ImportError:
+ enable_spell_check = False
+
 # source code directory, relative to this file, for sphinx-autobuild
 sys.path.insert(0, os.path.abspath('..'))
 
@@ -69,6 +77,9 @@ def __getattr__(cls, name):
  'sphinx.ext.viewcode',
 ]
 
+if enable_spell_check:
+ extensions.append('sphinxcontrib.spelling')
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 

diff --git a/docs/guide/custom_env.rst b/docs/guide/custom_env.rst
@@ -34,23 +34,42 @@ That is to say, your environment must implement the following methods (and inher
 
  def step(self, action):
  ...
+ return observation, reward, done, info
  def reset(self):
  ...
- def render(self, mode='human', close=False):
+ return observation # reward, done, info can't be included
+ def render(self, mode='human'):
+ ...
+ def close (self):
  ...
 
 
 Then you can define and train a RL agent with:
 
 .. code-block:: python
 
- # Instantiate and wrap the env
- env = DummyVecEnv([lambda: CustomEnv(arg1, ...)])
+ # Instantiate the env
+ env = CustomEnv(arg1, ...)
  # Define and Train the agent
- model = A2C(CnnPolicy, env).learn(total_timesteps=1000)
+ model = A2C('CnnPolicy', env).learn(total_timesteps=1000)
+
+
+To check that your environment follows the gym interface, please use:
+
+.. code-block:: python
+
+ from stable_baselines.common.env_checker import check_env
+
+ env = CustomEnv(arg1, ...)
+ # It will check your custom environment and output additional warnings if needed
+ check_env(env)
+
+
 
+We have created a `colab notebook <https://colab.research.google.com/github/araffin/rl-tutorial-jnrr19/blob/master/5_custom_gym_env.ipynb>`_ for
+a concrete example of creating a custom environment.
 
-You can find a `complete guide online <https://github.com/openai/gym/blob/master/docs/creating-environments.md>`_
+You can also find a `complete guide online <https://github.com/openai/gym/blob/master/docs/creating-environments.md>`_
 on creating a custom Gym environment.
 
 

diff --git a/docs/guide/examples.rst b/docs/guide/examples.rst
@@ -7,6 +7,8 @@ Try it online with Colab Notebooks!
 All the following examples can be executed online using Google colab |colab|
 notebooks:
 
+- `Full Tutorial <https://github.com/araffin/rl-tutorial-jnrr19>`_
+- `All Notebooks <https://github.com/Stable-Baselines-Team/rl-colab-notebooks>`_
 - `Getting Started`_
 - `Training, Saving, Loading`_
 - `Multiprocessing`_
@@ -16,14 +18,14 @@ notebooks:
 - `Hindsight Experience Replay`_
 - `RL Baselines zoo`_
 
-.. _Getting Started: https://colab.research.google.com/drive/1_1H5bjWKYBVKbbs-Kj83dsfuZieDNcFU
-.. _Training, Saving, Loading: https://colab.research.google.com/drive/16QritJF5kgT3mtnODepld1fo5tFnFCoc
-.. _Multiprocessing: https://colab.research.google.com/drive/1ZzNFMUUi923foaVsYb4YjPy4mjKtnOxb
-.. _Monitor Training and Plotting: https://colab.research.google.com/drive/1L_IMo6v0a0ALK8nefZm6PqPSy0vZIWBT
-.. _Atari Games: https://colab.research.google.com/drive/1iYK11yDzOOqnrXi1Sfjm1iekZr4cxLaN
-.. _Breakout: https://colab.research.google.com/drive/14NwwEHwN4hdNgGzzySjxQhEVDff-zr7O
-.. _Hindsight Experience Replay: https://colab.research.google.com/drive/1VDD0uLi8wjUXIqAdLKiK15XaEe0z2FOc
-.. _RL Baselines zoo: https://colab.research.google.com/drive/1cPGK3XrCqEs3QLqiijsfib9OFht3kObX
+.. _Getting Started: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/stable_baselines_getting_started.ipynb
+.. _Training, Saving, Loading: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/saving_loading_dqn.ipynb
+.. _Multiprocessing: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/multiprocessing_rl.ipynb
+.. _Monitor Training and Plotting: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/monitor_training.ipynb
+.. _Atari Games: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/atari_games.ipynb
+.. _Breakout: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/breakout.ipynb
+.. _Hindsight Experience Replay: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/stable_baselines_her.ipynb
+.. _RL Baselines zoo: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/rl-baselines-zoo.ipynb
 
 .. |colab| image:: ../_static/img/colab.svg
 
@@ -34,7 +36,7 @@ In the following example, we will train, save and load a DQN model on the Lunar
 
 .. image:: ../_static/img/try_it.png
  :scale: 30 %
- :target: https://colab.research.google.com/drive/16QritJF5kgT3mtnODepld1fo5tFnFCoc
+ :target: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/saving_loading_dqn.ipynb
 
 
 .. figure:: https://cdn-images-1.medium.com/max/960/1*f4VZPKOI0PYNWiwt0la0Rg.gif
@@ -56,6 +58,8 @@ In the following example, we will train, save and load a DQN model on the Lunar
  import gym
 
  from stable_baselines import DQN
+ from stable_baselines.common.evaluation import evaluate_policy
+
 
  # Create environment
  env = gym.make('LunarLander-v2')
@@ -71,6 +75,9 @@ In the following example, we will train, save and load a DQN model on the Lunar
  # Load the trained agent
  model = DQN.load("dqn_lunar")
 
+ # Evaluate the agent
+ mean_reward, n_steps = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
+
  # Enjoy trained agent
  obs = env.reset()
  for i in range(1000):
@@ -84,7 +91,7 @@ Multiprocessing: Unleashing the Power of Vectorized Environments
 
 .. image:: ../_static/img/try_it.png
  :scale: 30 %
- :target: https://colab.research.google.com/drive/1ZzNFMUUi923foaVsYb4YjPy4mjKtnOxb
+ :target: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/multiprocessing_rl.ipynb
 
 .. figure:: https://cdn-images-1.medium.com/max/960/1*h4WTQNVIsvMXJTCpXm_TAw.gif
 
@@ -98,7 +105,7 @@ Multiprocessing: Unleashing the Power of Vectorized Environments
 
  from stable_baselines.common.policies import MlpPolicy
  from stable_baselines.common.vec_env import SubprocVecEnv
- from stable_baselines.common import set_global_seeds
+ from stable_baselines.common import set_global_seeds, make_vec_env
  from stable_baselines import ACKTR
 
  def make_env(env_id, rank, seed=0):
@@ -123,6 +130,10 @@ Multiprocessing: Unleashing the Power of Vectorized Environments
  # Create the vectorized environment
  env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)])
 
+ # Stable Baselines provides you with make_vec_env() helper
+ # which does exactly the previous steps for you:
+ # env = make_vec_env(env_id, n_envs=num_cpu, seed=0)
+
  model = ACKTR(MlpPolicy, env, verbose=1)
  model.learn(total_timesteps=25000)
 
@@ -144,7 +155,7 @@ If your callback returns False, training is aborted early.
 
 .. image:: ../_static/img/try_it.png
  :scale: 30 %
- :target: https://colab.research.google.com/drive/1L_IMo6v0a0ALK8nefZm6PqPSy0vZIWBT
+ :target: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/monitor_training.ipynb
 
 .. figure:: ../_static/img/learning_curve.png
 
@@ -231,7 +242,7 @@ and multiprocessing for you.
 
 .. image:: ../_static/img/try_it.png
  :scale: 30 %
- :target: https://colab.research.google.com/drive/1iYK11yDzOOqnrXi1Sfjm1iekZr4cxLaN
+ :target: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/atari_games.ipynb
 
 
 .. code-block:: python
@@ -340,8 +351,6 @@ A2C policy gradient updates on the model.
  import gym
  import numpy as np
 
- from stable_baselines.common.policies import MlpPolicy
- from stable_baselines.common.vec_env import DummyVecEnv
  from stable_baselines import A2C
 
  def mutate(params):
@@ -365,9 +374,8 @@ A2C policy gradient updates on the model.
 
  # Create env
  env = gym.make('CartPole-v1')
- env = DummyVecEnv([lambda: env])
  # Create policy with a small network
- model = A2C(MlpPolicy, env, ent_coef=0.0, learning_rate=0.1,
+ model = A2C('MlpPolicy', env, ent_coef=0.0, learning_rate=0.1,
  policy_kwargs={'net_arch': [8, ]})
 
  # Use traditional actor-critic policy gradient updates to
@@ -451,7 +459,7 @@ For this example, we are using `Highway-Env <https://github.com/eleurent/highway
 
 .. image:: ../_static/img/try_it.png
  :scale: 30 %
- :target: https://colab.research.google.com/drive/1VDD0uLi8wjUXIqAdLKiK15XaEe0z2FOc
+ :target: https://colab.research.google.com/github/Stable-Baselines-Team/rl-colab-notebooks/blob/master/stable_baselines_her.ipynb
 
 
 .. figure:: https://raw.githubusercontent.com/eleurent/highway-env/gh-media/docs/media/parking-env.gif
@@ -546,6 +554,9 @@ You can also move from learning on one environment to another for `continual lea
  obs, rewards, dones, info = env.step(action)
  env.render()
 
+ # Close the processes
+ env.close()
+
  # The number of environments must be identical when changing environments
  env = make_atari_env('SpaceInvadersNoFrameskip-v4', num_env=8, seed=0)
 
@@ -558,6 +569,7 @@ You can also move from learning on one environment to another for `continual lea
  action, _states = model.predict(obs)
  obs, rewards, dones, info = env.step(action)
  env.render()
+ env.close()
 
 
 Record a Video
@@ -591,6 +603,7 @@ Record a mp4 video (here using a random agent).
  for _ in range(video_length + 1):
  action = [env.action_space.sample()]
  obs, _, _, _ = env.step(action)
+ # Save the video
  env.close()
 
 
@@ -606,10 +619,9 @@ Bonus: Make a GIF of a Trained Agent
  import imageio
  import numpy as np
 
- from stable_baselines.common.policies import MlpPolicy
  from stable_baselines import A2C
 
- model = A2C(MlpPolicy, "LunarLander-v2").learn(100000)
+ model = A2C("MlpPolicy", "LunarLander-v2").learn(100000)
 
  images = []
  obs = model.env.reset()