Add notebooks/agents.md

e10101 · Jan 9, 2022 · da71ac5 · da71ac5
1 parent e0a3fed
commit da71ac5
Showing 3 changed files with 27 additions and 8 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,17 +1,13 @@
 #FROM jupyter/tensorflow-notebook:12460db878e3
 FROM tensorflow/tensorflow:2.7.0-jupyter
-COPY requirements.txt ./
-RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
 
 USER root
 
-RUN apt-get update
+RUN apt-get update && apt-get upgrade --yes
 RUN apt-get install -y python-opengl
 RUN apt-get install -y xvfb
 RUN apt-get install -y ffmpeg
 RUN apt-get install -y freeglut3-dev
 
-RUN pip install pyvirtualdisplay
-RUN pip install dm-reverb
-RUN pip install jupyterlab
-RUN pip install imageio imageio-ffmpeg
+COPY requirements.txt ./
+RUN pip install -r requirements.txt -i https://pypi.tuna.tsinghua.edu.cn/simple
diff --git a/notebooks/agents.md b/notebooks/agents.md
@@ -0,0 +1,18 @@
+# Agents in the `tf-agents`
+
+## Agents
+
+| Agent (Algorithm)                                            | Description                                           | TFA Module         | Action Space         | Release | Inventor | Related Agents (or Algorithms)                               | On-policy / Off-policy |
+| ------------------------------------------------------------ | ----------------------------------------------------- | ------------------ | -------------------- | ------- | -------- | ------------------------------------------------------------ | ---------------------- |
+| BehavioralCloningAgent                                       | Behavioral Cloning                                    | behavioral_cloning |                      |         |          |                                                              |                        |
+| CategoricalDqnAgent                                          | Categorical DQN (C51)                                 | categorical_dqn    |                      |         |          |                                                              |                        |
+| CqlSacAgent                                                  | CQL-SAC                                               | cql                |                      |         |          |                                                              |                        |
+| DdpgAgent                                                    | Deep Deterministic Policy Gradient (DDPG)             | ddpg               |                      |         |          |                                                              |                        |
+| [DqnAgent](https://www.tensorflow.org/agents/api_docs/python/tf_agents/agents/DqnAgent) | Deep Q Network                                        | dqn                | Discrete             | 2013    | DeepMind |                                                              | Off-policy             |
+| [PPOAgent](https://www.tensorflow.org/agents/api_docs/python/tf_agents/agents/PPOAgent) | Proximal Policy Optimization                          | ppo                | Discrete, Continuous | 2017    | OpenAI   | ACER (Actor-Critic with Experience Replay), TRPO (Trust Region Policy Optimization) | On-policy              |
+| [PPOClipAgent](https://www.tensorflow.org/agents/api_docs/python/tf_agents/agents/PPOClipAgent) | PPO with clipped probability ratios                   | ppo                |                      |         |          |                                                              |                        |
+| [PPOKLPenaltyAgent](https://www.tensorflow.org/agents/api_docs/python/tf_agents/agents/PPOKLPenaltyAgent) | PPO with KL penalty loss                              | ppo                |                      |         |          |                                                              |                        |
+| ReinforceAgent                                               | REINFORCE                                             | reinforce          |                      |         |          |                                                              |                        |
+| SacAgent                                                     | Soft Actor Critic                                     | sas                |                      |         |          |                                                              |                        |
+| TD3 Agent                                                    | Twin Delayed Deep Deterministic policy gradient (TD3) | td3                |                      |         |          |                                                              |                        |
+
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,8 @@
 tf-agents
 tensorflow-probability
-pyglet
+pyglet
+pyvirtualdisplay
+dm-reverb
+jupyterlab
+imageio
+imageio-ffmpeg