From 81d21a28bd354019977e4bc2e8ca75a3e4e867cf Mon Sep 17 00:00:00 2001
From: Yuhui <yuhui.wan@huawei.com>
Date: Mon, 8 Jul 2024 13:42:02 +0100
Subject: [PATCH 1/5] Modify readme

---
 Agent/README.md | 2 ++
 1 file changed, 2 insertions(+)
diff --git a/Agent/README.md b/Agent/README.md
index 1522a500..2fba80ef 100644
--- a/Agent/README.md
+++ b/Agent/README.md
@@ -12,3 +12,5 @@ make html
 ```
 
 There are also several helpful [tutorials](tutorials/) to help you get started with running and customizing Agent.
+
+## ROS interface
\ No newline at end of file

From 62fc761792480164c7aa005aaa83644c043f5527 Mon Sep 17 00:00:00 2001
From: Yuhui <yuhui.wan@huawei.com>
Date: Mon, 8 Jul 2024 14:08:24 +0100
Subject: [PATCH 2/5] Add ROS interface

---
 Agent/configs/task/ros_task.yaml              |  16 +
 .../templates/default/direct_prompt.jinja     |   3 +
 .../templates/default/trajectory.jinja        |   2 +-
 Agent/src/agent/tasks/ros_api.py              |  67 ++++
 Agent/src/agent/tasks/ros_task.py             |  51 +++
 README.md                                     | 346 ++----------------
 6 files changed, 167 insertions(+), 318 deletions(-)
 create mode 100644 Agent/configs/task/ros_task.yaml
 create mode 100644 Agent/src/agent/prompts/templates/default/direct_prompt.jinja
 create mode 100644 Agent/src/agent/tasks/ros_api.py
 create mode 100644 Agent/src/agent/tasks/ros_task.py

diff --git a/Agent/configs/task/ros_task.yaml b/Agent/configs/task/ros_task.yaml
new file mode 100644
index 00000000..58a6f40b
--- /dev/null
+++ b/Agent/configs/task/ros_task.yaml
@@ -0,0 +1,16 @@
+# @package _global_
+agent:
+  prompt_builder:
+    template_paths:
+      - default
+  
+    
+task:
+  _target_: agent.tasks.ros_task.ROSTask
+  name: simplebot_test_env
+  description:
+  subtask: null
+  version: v0.1
+
+max_env_steps: 1
+max_episodes: 100
\ No newline at end of file
diff --git a/Agent/src/agent/prompts/templates/default/direct_prompt.jinja b/Agent/src/agent/prompts/templates/default/direct_prompt.jinja
new file mode 100644
index 00000000..77ff8ef2
--- /dev/null
+++ b/Agent/src/agent/prompts/templates/default/direct_prompt.jinja
@@ -0,0 +1,3 @@
+Now please answer the question.
+Answer in the format
+Answer: <answer>
diff --git a/Agent/src/agent/prompts/templates/default/trajectory.jinja b/Agent/src/agent/prompts/templates/default/trajectory.jinja
index 02485bbd..19a80b5b 100644
--- a/Agent/src/agent/prompts/templates/default/trajectory.jinja
+++ b/Agent/src/agent/prompts/templates/default/trajectory.jinja
@@ -3,4 +3,4 @@ Here is what happened so far:
 {%- if action %}
 Action: {{action}}
 {%- endif %}
-Observation: {{memory.retrieve({memory.mem_keys.OBSERVATION: 1.0})}}
+Current: {{memory.retrieve({memory.mem_keys.OBSERVATION: 1.0})}}
diff --git a/Agent/src/agent/tasks/ros_api.py b/Agent/src/agent/tasks/ros_api.py
new file mode 100644
index 00000000..02b5171f
--- /dev/null
+++ b/Agent/src/agent/tasks/ros_api.py
@@ -0,0 +1,67 @@
+import requests
+
+
+class RosApi:
+
+      # Defined on ros side
+    default_timeout = 3 * 60  # default timeout is 3 minutes
+
+    def __init__(self, timeout=None):
+        self.timeout = self.default_timeout if timeout is None else timeout
+        self._response = None
+
+    @staticmethod
+    def bad_response(obs):
+        return {
+            "success": False,
+            "done": True,
+            "reward": 0,
+            "obs": obs,
+        }
+
+    def send_action(self, action):
+        url = "http://localhost:5000/llmreq"
+        self._response = None  # reset response
+        try:
+            data = {"action": action}
+            resp = requests.post(url, json=data, timeout=self.timeout)
+            response = resp.json()
+        except requests.exceptions.Timeout:
+            response = self.bad_response("Request timeout.")
+        except requests.exceptions.RequestException as e:
+            response = self.bad_response(f"Request exception: {e}")
+        self._response = response
+
+    def get_env_observation(self):
+        url = "http://localhost:5000/rosenv"
+        self._response = None  # reset response
+        try:
+            data = {"": ""}
+            resp = requests.post(url, json=data, timeout=self.timeout)
+            response = resp.json()
+        except requests.exceptions.Timeout:
+            response = self.bad_response("Request timeout.")
+        except requests.exceptions.RequestException as e:
+            response = self.bad_response(f"Request exception: {e}")
+        self._response = response
+        
+        return response
+    
+    def get_feedback(self):
+        url = "http://localhost:5000/rosfdb"
+        self._response = None  # reset response
+        try:
+            data = {"": ""}
+            resp = requests.post(url, json=data, timeout=self.timeout)
+            response = resp.json()
+        except requests.exceptions.Timeout:
+            response = self.bad_response("Request timeout.")
+        except requests.exceptions.RequestException as e:
+            response = self.bad_response(f"Request exception: {e}")
+        self._response = response
+        
+        return response
+
+    def receive_response(self):
+        assert self._response is not None, "did not receive a response"
+        return self._response
diff --git a/Agent/src/agent/tasks/ros_task.py b/Agent/src/agent/tasks/ros_task.py
new file mode 100644
index 00000000..a0c24d4d
--- /dev/null
+++ b/Agent/src/agent/tasks/ros_task.py
@@ -0,0 +1,51 @@
+from agent.tasks import Task
+from .ros_api import RosApi
+from typing import Any, Dict
+import warnings
+
+
+class ROSTask(Task):
+
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.ros_api = RosApi()
+        print(self.ros_api)
+        self.response = {"reward": 0.0,
+                         "done": False,
+                         "obs": ""}
+        self.possible_actions = []
+
+    def answer_parser(self, raw_response: str):
+        return raw_response
+
+    def is_complete(self):
+        return self.response['done']
+
+    def reset(self, next_subtask: str | None = None) -> Dict[str, str]:
+        """Reset the environment and return the initial observation."""
+
+        if next_subtask is not None:
+            warnings.warn("ros_task does not support subtasks, ignoring subtask")
+
+        response= self.ros_api.get_env_observation()
+
+        return {
+            "_text_obs": response,
+            "_available_actions": self.possible_actions
+        }
+
+    def get_observation(self):
+        obs = self.ros_api.get_env_observation()
+        fdb = self.ros_api.get_feedback()
+        return {
+            "_available_actions": obs,
+            "_available_actions": fdb
+        }
+
+    def step(self, action):
+        print(action)
+        self.ros_api.send_action(action)
+        self.response = self.ros_api.receive_response()
+        print(self.response['obs'])
+        return {}, self.response["reward"], self.response["done"]
+
diff --git a/README.md b/README.md
index 873fde76..b5b0f6fc 100644
--- a/README.md
+++ b/README.md
@@ -1,317 +1,29 @@
-# Bayesian Optimisation & Reinforcement Learning Research
-
-This directory contains official implementations for Bayesian optimization & Reinforcement Learning works developed by
-Huawei, Noah's Ark Lab.
-
-- Bayesian Optimisation Research
-    - [HEBO: Heteroscedastic Evolutionary Bayesian Optimisation](./HEBO)
-    - [MCBO: Framework and Benchmarks for Combinatorial and Mixed-variable Bayesian Optimization](./MCBO)
-    - [NAP: End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](./NAP)
-    - [RDUCB: High Dimensional Bayesian Optimisation with Random Decompositions](./RDUCB)
-    - [AntBO: Antibody Design with Combinatorial Bayesian Optimisation](./AntBO)
-    - [BOiLS: Bayesian Optimisation for Logic Synthesis](./BOiLS)
-    - [T-LBO: BO in a latent space shaped by metric learning](./T-LBO)
-    - [Bayesian Optimisation with Compositional Optimisers](./CompBO)
-    - [AIRBO: Efficient Robust Bayesian Optimisation for Arbitrary Uncertain Inputs](./AIRBO)
-- Reinforcement Learning Research
-    - [Enhancing Reinforcement Learning Agents with Local Guides](RLLG)
-    - [Sauté RL and Simmer RL: Safe Reinforcement Learning Using Safety State Augmentation ](./SIMMER)
-    - [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](./PMDB)
-- Robotics Research
-    - [ROS-LLM: A ROS framework for embodied AI with task feedback and structured reasoning](./ROSLLM)
-
-Further instructions are provided in the README files associated to each project.
-
-# Bayesian Optimisation Research
-
-## [HEBO](./HEBO)
-
-<img src="./HEBO/hebo.png" alt="drawing" width="400"/>
-
-Bayesian optimization library developed by Huawei Noahs Ark Decision Making and Reasoning (DMnR) lab. The <strong>
-winning submission </strong> to
-the [NeurIPS 2020 Black-Box Optimisation Challenge](https://bbochallenge.com/leaderboard).
-
-## [MCBO](./MCBO)
-
-<p float="center">
-<img src="MCBO/paper_results/images/all_mix_match.PNG" width="400"/>
-<img src="MCBO/paper_results/images/results.png" width="400"/>
-</p>
-
-
-Codebase associated to: [Framework and Benchmarks for Combinatorial and Mixed-variable Bayesian Optimization
-](https://arxiv.org/abs/2306.09803) accepted at **NeurIPS** (2023).
-
-##### Abstract
-
-This paper introduces a modular framework for Mixed-variable and Combinatorial
-Bayesian Optimization (MCBO) to address the lack of systematic benchmarking
-and standardized evaluation in the field. Current MCBO papers often introduce
-non-diverse or non-standard benchmarks to evaluate their methods, impeding the
-proper assessment of different MCBO primitives and their combinations. Additionally, papers introducing a solution for a
-single MCBO primitive often omit
-benchmarking against baselines that utilize the same methods for the remaining primitives. This omission is primarily
-due to the significant implementation
-overhead involved, resulting in a lack of controlled assessments and an inability
-to showcase the merits of a contribution effectively. To overcome these challenges, our proposed framework enables an
-effortless combination of Bayesian
-Optimization components, and provides a diverse set of synthetic and real-world
-benchmarking tasks. Leveraging this flexibility, we implement 47 novel MCBO
-algorithms and benchmark them against seven existing MCBO solvers and five
-standard black-box optimization algorithms on ten tasks, conducting over 4000
-experiments. Our findings reveal a superior combination of MCBO primitives
-outperforming existing approaches and illustrate the significance of model fit and
-the use of a trust region. We make our MCBO library available under the MIT
-license at https://github.com/huawei-noah/HEBO/tree/master/MCBO.
-
-## [NAP: End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](./NAP/)
-
-![regret-all](./NAP/figures/regret-all.png)
-Codebase associated
-to: [End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](https://arxiv.org/abs/2305.15930),
-accepted at **NeurIPS** (2023).
-
-##### Abstract
-
-Meta-Bayesian optimisation (meta-BO) aims to improve the sample efficiency of Bayesian optimisation by leveraging data
-from related tasks. While previous methods successfully meta-learn either a surrogate model or an acquisition function
-independently, joint training of both components remains an open challenge. This paper proposes the first end-to-end
-differentiable meta-BO framework that generalises neural processes to learn acquisition functions via transformer
-architectures. We enable this end-to-end framework with reinforcement learning (RL) to tackle the lack of labelled
-acquisition data. Early on, we notice that training transformer-based neural processes from scratch with RL is
-challenging due to insufficient supervision, especially when rewards are sparse. We formalise this claim with a
-combinatorial analysis showing that the widely used notion of regret as a reward signal exhibits a logarithmic sparsity
-pattern in trajectory lengths. To tackle this problem, we augment the RL objective with an auxiliary task that guides
-part of the architecture to learn a valid probabilistic model as an inductive bias. We demonstrate that our method
-achieves state-of-the-art regret results against various baselines in experiments on standard hyperparameter
-optimisation tasks and also outperforms others in the real-world problems of mixed-integer programming tuning, antibody
-design, and logic synthesis for electronic design automation.
-
-## [RDUCB: High Dimensional Bayesian Optimisation with Random Decompositions](./RDUCB)
-
-<p align="center">
-  <img src="./RDUCB/figures/ToyProblem.PNG" width="400" />
-</p>
-
-Codebase associated
-to: [Are Random Decomositions all we need in High Dimensional Bayesian Optimisation](https://arxiv.org/pdf/2301.12844.pdf)
-accepted at **ICML** (2023).
-
-##### Abstract
-
-Learning decompositions of expensive-to-evaluate black-box functions promises to scale Bayesian optimisation (BO) to
-high-dimensional
-problems. However, the success of these techniques depends on finding proper decompositions
-that accurately represent the black-box. While previous works learn those decompositions based
-on data, we investigate data-independent decomposition sampling rules in this paper. We find
-that data-driven learners of decompositions can be easily misled towards local decompositions
-that do not hold globally across the search space. Then, we formally show that a random tree-based
-decomposition sampler exhibits favourable theoretical guarantees that effectively trade
-off maximal information gain and functional mismatch between the actual black-box and its
-surrogate as provided by the decomposition. Those results motivate the development of the
-random decomposition upper-confidence bound algorithm (RDUCB) that is straightforward
-to implement - (almost) plug-and-play - and, surprisingly, yields significant empirical gains
-compared to the previous state-of-the-art on a comprehensive set of benchmarks. We also
-confirm the plug-and-play nature of our modelling component by integrating our method with
-HEBO, showing improved practical gains in the highest dimensional tasks from Bayesmark.
-
-## [AntBO: Antibody Design with Combinatorial Bayesian Optimisation](./AntBO)
-
-![AntBO overview](./AntBO/figures/AntBO_illustrationPNG.PNG?raw=true)
-
-Codebase associated
-to: [AntBO: Towards Real-World Automated Antibody Design with Combinatorial Bayesian Optimisation](https://www.sciencedirect.com/science/article/pii/S2667237522002764)
-published in **Cell Reports Method**s (2023).
-
-##### Abstract
-
-Antibodies are canonically Y-shaped multimeric proteins capable of highly specific molecular recognition. The CDRH3
-region located at the tip of variable chains of an antibody dominates antigen-binding specificity. Therefore, it is a
-priority to design optimal antigen-specific CDRH3 regions to develop therapeutic antibodies to combat harmful pathogens.
-However, the combinatorial nature of CDRH3 sequence space makes it impossible to search for an optimal binding sequence
-exhaustively and efficiently, especially not experimentally. Here, we present AntBO: a Combinatorial Bayesian
-Optimisation framework enabling efficient in silico design of the CDRH3 region. Ideally, antibodies should bind to
-their target antigen and be free from any harmful outcomes. Therefore, we introduce the CDRH3 trust region that
-restricts the search to sequences with feasible developability scores. To benchmark AntBO, we use the Absolut! software
-suite as a black-box oracle because it can score the target specificity and affinity of designed antibodies in silico
-in an unconstrained fashion. The results across 188 antigens demonstrate the benefit of AntBO in designing CDRH3 regions
-with diverse biophysical properties. In under 200 protein designs, AntBO can suggest antibody sequences that outperform
-the best binding sequence drawn from 6.9 million experimentally obtained CDRH3s and a commonly used genetic algorithm
-baseline. Additionally, AntBO finds very-high affinity CDRH3 sequences in only 38 protein designs whilst requiring no
-domain knowledge. We conclude AntBO brings automated antibody design methods closer to what is practically viable for
-in vitro experimentation.
-
-## [BOiLS: Bayesian Optimisation for Logic Synthesis](./BOiLS)
-
-<p align="center">
-    <img src="./BOiLS/results/sample-eff-1.png" alt="drawing" width="500"/>
-</p>
-
-Codebase associated to: [BOiLS: Bayesian Optimisation for Logic Synthesis](https://arxiv.org/abs/2111.06178) accepted
-at **DATE22** conference.
-
-##### Abstract
-
-Optimising the quality-of-results (QoR) of circuits during logic synthesis is a formidable challenge necessitating the
-exploration of exponentially sized search spaces. While expert-designed operations aid in uncovering effective
-sequences, the increase in complexity of logic circuits favours automated procedures. Inspired by the successes of
-machine learning, researchers adapted deep learning and reinforcement learning to logic synthesis applications. However
-successful, those techniques suffer from high sample complexities preventing widespread adoption. To enable efficient
-and scalable solutions, we propose BOiLS, the first algorithm adapting modern Bayesian optimisation to navigate the
-space of synthesis operations. BOiLS requires no human intervention and effectively trades-off exploration versus
-exploitation through novel Gaussian process kernels and trust-region constrained acquisitions. In a set of experiments
-on EPFL benchmarks, we demonstrate BOiLS's superior performance compared to state-of-the-art in terms of both sample
-efficiency and QoR values.
-
-## [T-LBO: BO in a latent space shaped by metric learning](./T-LBO)
-
-<p float="center">
-  <img src="./T-LBO/figures/LSBO.png" width="400" />
-  <img src="./T-LBO/figures/magnets.png" width="400" />
-</p>
-
-Codebase associated
-to: [High-Dimensional Bayesian Optimisation with Variational Autoencoders and Deep Metric Learning](https://arxiv.org/abs/2106.03609)
-
-##### Abstract
-
-We introduce a method based on deep metric learning to perform Bayesian optimisation over high-dimensional, structured
-input spaces using variational autoencoders (VAEs). By extending ideas from supervised deep metric learning, we address
-a longstanding problem in high-dimensional VAE Bayesian optimisation, namely how to enforce a discriminative latent
-space as an inductive bias. Importantly, we achieve such an inductive bias using just 1% of the available labelled data
-relative to previous work, highlighting the sample efficiency of our approach. As a theoretical contribution, we present
-a proof of vanishing regret for our method. As an empirical contribution, we present state-of-the-art results on
-real-world high-dimensional black-box optimisation problems including property-guided molecule generation. It is the
-hope that the results presented in this paper can act as a guiding principle for realising effective high-dimensional
-Bayesian optimisation.
-
-## [Bayesian Optimisation with Compositional Optimisers](./CompBO)
-
-<div style="text-align:center"><img src="./CompBO/image/summary-Best-performance-on-Synthetic-tasks-matern-52-3.png" alt="drawing" width="600"/>
-
-<div style="text-align:left"></>
-
-Codebase associated
-to: [Are we Forgetting about Compositional Optimisers in Bayesian Optimisation?](https://www.jmlr.org/papers/v22/20-1422.html)
-accepted at **JMLR**.
-
-##### Abstract
-
-Bayesian optimisation presents a sample-efficient methodology for global optimisation. Within this framework, a crucial
-performance-determining subroutine is the maximisation of the acquisition function, a task complicated by the fact that
-acquisition functions tend to be non-convex and thus nontrivial to optimise. In this paper, we undertake a comprehensive
-empirical study of approaches to maximise the acquisition function. Additionally, by deriving novel, yet mathematically
-equivalent, compositional forms for popular acquisition functions, we recast the maximisation task as a compositional
-optimisation problem, allowing us to benefit from the extensive literature in this field. We highlight the empirical
-advantages of the compositional approach to acquisition function maximisation across 3958 individual experiments
-comprising synthetic optimisation tasks as well as tasks from Bayesmark. Given the generality of the acquisition
-function maximisation subroutine, we posit that the adoption of compositional optimisers has the potential to yield
-performance improvements across all domains in which Bayesian optimisation is currently being applied.
-
-## [AIRBO](./AIRBO)
-
-<div style="text-align:center"><img src="./AIRBO/figures\opt_performance.png" alt="drawing" width="600"/>
-
-
-
-Codebase associated
-to: [Efficient Robust Bayesian Optimization for Arbitrary Uncertain Inputs](https://arxiv.org/abs/2310.20145)
-
-##### Abstract
-
-Bayesian Optimization (BO) is a sample-efficient optimization algorithm widely employed across various applications. In
-some challenging BO tasks, input uncertainty arises due to the inevitable randomness in the optimization process, such
-as machining errors, execution noise, or contextual variability. This uncertainty deviates the input from the intended
-value before evaluation, resulting in significant performance fluctuations in the final result. In this paper, we
-introduce a novel robust Bayesian Optimization algorithm, AIRBO, which can effectively identify a robust optimum that
-performs consistently well under arbitrary input uncertainty. Our method directly models the uncertain inputs of
-arbitrary distributions by empowering the Gaussian Process with the Maximum Mean Discrepancy (MMD) and further
-accelerates the posterior inference via Nystrom approximation. Rigorous theoretical regret bound is established under
-MMD estimation error and extensive experiments on synthetic functions and real problems demonstrate that our approach
-can handle various input uncertainties and achieve state-of-the-art performance.
-
-# Reinforcement Learning Research
-
-## [Enhancing Reinforcement Learning Agents with Local Guides](./RLLG)
-
-Codebase associated
-to [paper: Enhancing Reinforcement Learning Agents with Local Guides](https://hal.science/hal-04052358/file/Final_Reinforcement_Learning_with_Local_Guides.pdf)
-
-##### Abstract
-
-This paper addresses the problem of integrating local guide policies into a Reinforcement Learning agent. For this, we
-show how
-to adapt existing algorithms to this setting before introducing a
-novel algorithm based on a noisy policy-switching procedure. This
-approach builds on a proper Approximate Policy Evaluation (APE)
-scheme to provide a perturbation that carefully leads the local
-guides towards better actions. We evaluated our method on a set
-of classical Reinforcement Learning problems, including safetycritical systems where the agent cannot enter some areas
-at the
-risk of triggering catastrophic consequences. In all the proposed
-environments, our agent proved to be efficient at leveraging those
-policies to improve the performance of any APE-based Reinforcement Learning algorithm, especially in its first learning
-stages.
-
-## [Sauté RL and Simmer RL: Safe Reinforcement Learning Using Safety State Augmentation](./SIMMER)
-
-Codebase associated to: [Sauté RL: Almost Surely Safe RL Using State Augmentation](https://arxiv.org/pdf/2202.06558.pdf)
-and  [Enhancing Safe Exploration Using Safety State Augmentation](https://arxiv.org/pdf/2206.02675.pdf).
-
-##### Abstract for Sauté RL: Almost Surely Safe RL Using State Augmentation (ICML 2022)
-
-Satisfying safety constraints almost surely (or with probability one) can be critical for deployment of Reinforcement
-Learning (RL) in real-life applications. For example, plane landing and take-off should ideally occur with probability
-one. We address the problem by introducing Safety Augmented (Saute) Markov Decision Processes (MDPs), where the safety
-constraints are eliminated by augmenting them into the state-space and reshaping the objective. We show that Saute MDP
-satisfies the Bellman equation and moves us closer to solving Safe RL with constraints satisfied almost surely. We argue
-that Saute MDP allows to view Safe RL problem from a different perspective enabling new features. For instance, our
-approach has a plug-and-play nature, i.e., any RL algorithm can be "sauteed". Additionally, state augmentation allows
-for policy generalization across safety constraints. We finally show that Saute RL algorithms can outperform their
-state-of-the-art counterparts when constraint satisfaction is of high importance.
-
-##### Abstract for Effects of Safety State Augmentation on Safe Exploration (NeurIPS 2022)
-
-Safe exploration is a challenging and important problem in model-free reinforcement learning (RL). Often the safety cost
-is sparse and unknown, which unavoidably leads to constraint violations -- a phenomenon ideally to be avoided in
-safety-critical applications. We tackle this problem by augmenting the state-space with a safety state, which is
-nonnegative if and only if the constraint is satisfied. The value of this state also serves as a distance toward
-constraint violation, while its initial value indicates the available safety budget. This idea allows us to derive
-policies for scheduling the safety budget during training. We call our approach Simmer (Safe policy IMproveMEnt for
-RL) to reflect the careful nature of these schedules. We apply this idea to two safe RL problems: RL with constraints
-imposed on an average cost, and RL with constraints imposed on a cost with probability one. Our experiments suggest
-that simmering a safe algorithm can improve safety during training for both settings. We further show that Simmer can
-stabilize training and improve the performance of safe RL with average constraints.
-
-## [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](./PMDB)
-
-Code associdated
-to: [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](https://nips.cc/Conferences/2022/Schedule?showEvent=54842)
-accepted
-at **NeurIPS (2022)** conference.
-
-#### Abstract
-
-Model-based offline reinforcement learning (RL) aims to find highly rewarding policy, by leveraging a previously
-collected static dataset and a dynamics model. While learned through reuse of static dataset, the dynamics model's
-generalization ability hopefully promotes policy learning if properly utilized. To that end, several works propose to
-quantify the uncertainty of predicted dynamics, and explicitly apply it to penalize reward. However, as the dynamics and
-the reward are intrinsically different factors in context of MDP, characterizing the impact of dynamics uncertainty
-through reward penalty may incur unexpected tradeoff between model utilization and risk avoidance. In this work, we
-instead maintain a belief distribution over dynamics, and evaluate/optimize policy through biased sampling from the
-belief. The sampling procedure, biased towards pessimism, is derived based on an alternating Markov game formulation
-of offline RL. We formally show that the biased sampling naturally induces an updated dynamics belief with
-policy-dependent reweighting factor, termed *Pessimism-Modulated Dynamics Belief*. To improve policy, we devise an
-iterative regularized policy optimization algorithm for the game, with guarantee of monotonous improvement under certain
-condition. To make practical, we further devise an offline RL algorithm to approximately find the solution. Empirical
-results show that the proposed approach achieves state-of-the-art performance on a wide range of benchmark tasks.
-
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
-
-## Codebase Contributors
-
-<strong> Current contributors: </strong> Antoine Grosnit, Alexandre Max Maravel, Taher Jafferjee, Wenlong Lyu, Kaiyang Guo, Juliusz Ziomek, Paul Daoudi, Merwan Barlier, Christopher E. Mower.
-
-<strong> Alumni / External contributors: </strong> Alexander I. Cowen-Rivers, Kamil Dreczkowski, Aivar Sootla, Ryan Rhys Griffiths, Zhi Wang, Ludovic Dos Santos, Bogdan Robu, Christophe Prieur.
-
+# Agent
+
+This library allows you to run many tasks using LLMs in a modular way!
+
+## Documentation
+
+You can compile the documentation yourself under `docs/` by running:
+
+```bash
+cd docs
+make html
+```
+
+There are also several helpful [tutorials](tutorials/) to help you get started with running and customizing Agent.
+
+
+## ROS interface
+
+To run the agent with Flask interface with ROS 
+
+In conda environment using the following command:
+
+```
+python ../src/agent/start.py task=ros_task method=direct llm@agent.llm=human
+```
+
+Replace the ``llm@agent.llm=human`` with your actual model.
+
+In ROS environment follow the instruction in the ros_pange_agent packge

From 67592b4cd065fe3064d17c15989577f007feb490 Mon Sep 17 00:00:00 2001
From: Yuhui <yuhui.wan@huawei.com>
Date: Mon, 8 Jul 2024 14:12:42 +0100
Subject: [PATCH 3/5] Update readme

---
 Agent/README.md | 15 ++++++++++++++-
 README.md       | 29 -----------------------------
 2 files changed, 14 insertions(+), 30 deletions(-)
 delete mode 100644 README.md

diff --git a/Agent/README.md b/Agent/README.md
index 2fba80ef..b5b0f6fc 100644
--- a/Agent/README.md
+++ b/Agent/README.md
@@ -13,4 +13,17 @@ make html
 
 There are also several helpful [tutorials](tutorials/) to help you get started with running and customizing Agent.
 
-## ROS interface
\ No newline at end of file
+
+## ROS interface
+
+To run the agent with Flask interface with ROS 
+
+In conda environment using the following command:
+
+```
+python ../src/agent/start.py task=ros_task method=direct llm@agent.llm=human
+```
+
+Replace the ``llm@agent.llm=human`` with your actual model.
+
+In ROS environment follow the instruction in the ros_pange_agent packge
diff --git a/README.md b/README.md
deleted file mode 100644
index b5b0f6fc..00000000
--- a/README.md
+++ /dev/null
@@ -1,29 +0,0 @@
-# Agent
-
-This library allows you to run many tasks using LLMs in a modular way!
-
-## Documentation
-
-You can compile the documentation yourself under `docs/` by running:
-
-```bash
-cd docs
-make html
-```
-
-There are also several helpful [tutorials](tutorials/) to help you get started with running and customizing Agent.
-
-
-## ROS interface
-
-To run the agent with Flask interface with ROS 
-
-In conda environment using the following command:
-
-```
-python ../src/agent/start.py task=ros_task method=direct llm@agent.llm=human
-```
-
-Replace the ``llm@agent.llm=human`` with your actual model.
-
-In ROS environment follow the instruction in the ros_pange_agent packge

From e4a47dc691445336ea66670ce67736a4bfdbf90e Mon Sep 17 00:00:00 2001
From: Yuhui <yuhui.wan@huawei.com>
Date: Mon, 8 Jul 2024 14:15:09 +0100
Subject: [PATCH 4/5] wrong readme before

---
 README.md | 306 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 306 insertions(+)
 create mode 100644 README.md

diff --git a/README.md b/README.md
new file mode 100644
index 00000000..412009cc
--- /dev/null
+++ b/README.md
@@ -0,0 +1,306 @@
+# Bayesian Optimisation & Reinforcement Learning Research
+
+This directory contains official implementations for Bayesian optimization & Reinforcement Learning works developed by
+Huawei, Noah's Ark Lab.
+
+- Bayesian Optimisation Research
+    - [HEBO: Heteroscedastic Evolutionary Bayesian Optimisation](./HEBO)
+    - [MCBO: Framework and Benchmarks for Combinatorial and Mixed-variable Bayesian Optimization](./MCBO)
+    - [NAP: End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](./NAP)
+    - [RDUCB: High Dimensional Bayesian Optimisation with Random Decompositions](./RDUCB)
+    - [AntBO: Antibody Design with Combinatorial Bayesian Optimisation](./AntBO)
+    - [BOiLS: Bayesian Optimisation for Logic Synthesis](./BOiLS)
+    - [T-LBO: BO in a latent space shaped by metric learning](./T-LBO)
+    - [Bayesian Optimisation with Compositional Optimisers](./CompBO)
+    - [AIRBO: Efficient Robust Bayesian Optimisation for Arbitrary Uncertain Inputs](./AIRBO)
+- Reinforcement Learning Research
+    - [Enhancing Reinforcement Learning Agents with Local Guides](RLLG)
+    - [Sauté RL and Simmer RL: Safe Reinforcement Learning Using Safety State Augmentation ](./SIMMER)
+    - [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](./PMDB)
+
+Further instructions are provided in the README files associated to each project.
+
+# Bayesian Optimisation Research
+
+## [HEBO](./HEBO)
+
+<img src="./HEBO/hebo.png" alt="drawing" width="400"/>
+
+Bayesian optimization library developed by Huawei Noahs Ark Decision Making and Reasoning (DMnR) lab. The <strong>
+winning submission </strong> to
+the [NeurIPS 2020 Black-Box Optimisation Challenge](https://bbochallenge.com/leaderboard).
+
+## [MCBO](./MCBO)
+
+<p float="center">
+<img src="MCBO/paper_results/images/all_mix_match.PNG" width="400"/>
+<img src="MCBO/paper_results/images/results.png" width="400"/>
+</p>
+
+
+Codebase associated to: [Framework and Benchmarks for Combinatorial and Mixed-variable Bayesian Optimization
+](https://arxiv.org/abs/2306.09803) accepted at **NeurIPS** (2023).
+
+##### Abstract
+
+This paper introduces a modular framework for Mixed-variable and Combinatorial
+Bayesian Optimization (MCBO) to address the lack of systematic benchmarking
+and standardized evaluation in the field. Current MCBO papers often introduce
+non-diverse or non-standard benchmarks to evaluate their methods, impeding the
+proper assessment of different MCBO primitives and their combinations. Additionally, papers introducing a solution for a
+single MCBO primitive often omit
+benchmarking against baselines that utilize the same methods for the remaining primitives. This omission is primarily
+due to the significant implementation
+overhead involved, resulting in a lack of controlled assessments and an inability
+to showcase the merits of a contribution effectively. To overcome these challenges, our proposed framework enables an
+effortless combination of Bayesian
+Optimization components, and provides a diverse set of synthetic and real-world
+benchmarking tasks. Leveraging this flexibility, we implement 47 novel MCBO
+algorithms and benchmark them against seven existing MCBO solvers and five
+standard black-box optimization algorithms on ten tasks, conducting over 4000
+experiments. Our findings reveal a superior combination of MCBO primitives
+outperforming existing approaches and illustrate the significance of model fit and
+the use of a trust region. We make our MCBO library available under the MIT
+license at https://github.com/huawei-noah/HEBO/tree/master/MCBO.
+
+## [NAP: End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](./NAP/)
+
+![regret-all](./NAP/figures/regret-all.png)
+Codebase associated
+to: [End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](https://arxiv.org/abs/2305.15930),
+accepted at **NeurIPS** (2023).
+
+##### Abstract
+
+Meta-Bayesian optimisation (meta-BO) aims to improve the sample efficiency of Bayesian optimisation by leveraging data
+from related tasks. While previous methods successfully meta-learn either a surrogate model or an acquisition function
+independently, joint training of both components remains an open challenge. This paper proposes the first end-to-end
+differentiable meta-BO framework that generalises neural processes to learn acquisition functions via transformer
+architectures. We enable this end-to-end framework with reinforcement learning (RL) to tackle the lack of labelled
+acquisition data. Early on, we notice that training transformer-based neural processes from scratch with RL is
+challenging due to insufficient supervision, especially when rewards are sparse. We formalise this claim with a
+combinatorial analysis showing that the widely used notion of regret as a reward signal exhibits a logarithmic sparsity
+pattern in trajectory lengths. To tackle this problem, we augment the RL objective with an auxiliary task that guides
+part of the architecture to learn a valid probabilistic model as an inductive bias. We demonstrate that our method
+achieves state-of-the-art regret results against various baselines in experiments on standard hyperparameter
+optimisation tasks and also outperforms others in the real-world problems of mixed-integer programming tuning, antibody
+design, and logic synthesis for electronic design automation.
+
+## [RDUCB: High Dimensional Bayesian Optimisation with Random Decompositions](./RDUCB)
+
+<p align="center">
+  <img src="./RDUCB/figures/ToyProblem.PNG" width="400" /> 
+</p>
+
+Codebase associated
+to: [Are Random Decomositions all we need in High Dimensional Bayesian Optimisation](https://arxiv.org/pdf/2301.12844.pdf)
+accepted at **ICML** (2023).
+
+##### Abstract
+
+Learning decompositions of expensive-to-evaluate black-box functions promises to scale Bayesian optimisation (BO) to
+high-dimensional
+problems. However, the success of these techniques depends on finding proper decompositions
+that accurately represent the black-box. While previous works learn those decompositions based
+on data, we investigate data-independent decomposition sampling rules in this paper. We find
+that data-driven learners of decompositions can be easily misled towards local decompositions
+that do not hold globally across the search space. Then, we formally show that a random tree-based
+decomposition sampler exhibits favourable theoretical guarantees that effectively trade
+off maximal information gain and functional mismatch between the actual black-box and its
+surrogate as provided by the decomposition. Those results motivate the development of the
+random decomposition upper-confidence bound algorithm (RDUCB) that is straightforward
+to implement - (almost) plug-and-play - and, surprisingly, yields significant empirical gains
+compared to the previous state-of-the-art on a comprehensive set of benchmarks. We also
+confirm the plug-and-play nature of our modelling component by integrating our method with
+HEBO, showing improved practical gains in the highest dimensional tasks from Bayesmark.
+
+## [AntBO: Antibody Design with Combinatorial Bayesian Optimisation](./AntBO)
+
+![AntBO overview](./AntBO/figures/AntBO_illustrationPNG.PNG?raw=true)
+
+Codebase associated
+to: [AntBO: Towards Real-World Automated Antibody Design with Combinatorial Bayesian Optimisation](https://www.sciencedirect.com/science/article/pii/S2667237522002764)
+published in **Cell Reports Method**s (2023).
+
+##### Abstract
+
+Antibodies are canonically Y-shaped multimeric proteins capable of highly specific molecular recognition. The CDRH3
+region located at the tip of variable chains of an antibody dominates antigen-binding specificity. Therefore, it is a
+priority to design optimal antigen-specific CDRH3 regions to develop therapeutic antibodies to combat harmful pathogens.
+However, the combinatorial nature of CDRH3 sequence space makes it impossible to search for an optimal binding sequence
+exhaustively and efficiently, especially not experimentally. Here, we present AntBO: a Combinatorial Bayesian
+Optimisation framework enabling efficient in silico design of the CDRH3 region. Ideally, antibodies should bind to
+their target antigen and be free from any harmful outcomes. Therefore, we introduce the CDRH3 trust region that
+restricts the search to sequences with feasible developability scores. To benchmark AntBO, we use the Absolut! software
+suite as a black-box oracle because it can score the target specificity and affinity of designed antibodies in silico
+in an unconstrained fashion. The results across 188 antigens demonstrate the benefit of AntBO in designing CDRH3 regions
+with diverse biophysical properties. In under 200 protein designs, AntBO can suggest antibody sequences that outperform
+the best binding sequence drawn from 6.9 million experimentally obtained CDRH3s and a commonly used genetic algorithm
+baseline. Additionally, AntBO finds very-high affinity CDRH3 sequences in only 38 protein designs whilst requiring no
+domain knowledge. We conclude AntBO brings automated antibody design methods closer to what is practically viable for
+in vitro experimentation.
+
+## [BOiLS: Bayesian Optimisation for Logic Synthesis](./BOiLS)
+
+<p align="center">
+    <img src="./BOiLS/results/sample-eff-1.png" alt="drawing" width="500"/>
+</p>
+
+Codebase associated to: [BOiLS: Bayesian Optimisation for Logic Synthesis](https://arxiv.org/abs/2111.06178) accepted
+at **DATE22** conference.
+
+##### Abstract
+
+Optimising the quality-of-results (QoR) of circuits during logic synthesis is a formidable challenge necessitating the
+exploration of exponentially sized search spaces. While expert-designed operations aid in uncovering effective
+sequences, the increase in complexity of logic circuits favours automated procedures. Inspired by the successes of
+machine learning, researchers adapted deep learning and reinforcement learning to logic synthesis applications. However
+successful, those techniques suffer from high sample complexities preventing widespread adoption. To enable efficient
+and scalable solutions, we propose BOiLS, the first algorithm adapting modern Bayesian optimisation to navigate the
+space of synthesis operations. BOiLS requires no human intervention and effectively trades-off exploration versus
+exploitation through novel Gaussian process kernels and trust-region constrained acquisitions. In a set of experiments
+on EPFL benchmarks, we demonstrate BOiLS's superior performance compared to state-of-the-art in terms of both sample
+efficiency and QoR values.
+
+## [T-LBO: BO in a latent space shaped by metric learning](./T-LBO)
+
+<p float="center">
+  <img src="./T-LBO/figures/LSBO.png" width="400" />
+  <img src="./T-LBO/figures/magnets.png" width="400" /> 
+</p>
+
+Codebase associated
+to: [High-Dimensional Bayesian Optimisation with Variational Autoencoders and Deep Metric Learning](https://arxiv.org/abs/2106.03609)
+
+##### Abstract
+
+We introduce a method based on deep metric learning to perform Bayesian optimisation over high-dimensional, structured
+input spaces using variational autoencoders (VAEs). By extending ideas from supervised deep metric learning, we address
+a longstanding problem in high-dimensional VAE Bayesian optimisation, namely how to enforce a discriminative latent
+space as an inductive bias. Importantly, we achieve such an inductive bias using just 1% of the available labelled data
+relative to previous work, highlighting the sample efficiency of our approach. As a theoretical contribution, we present
+a proof of vanishing regret for our method. As an empirical contribution, we present state-of-the-art results on
+real-world high-dimensional black-box optimisation problems including property-guided molecule generation. It is the
+hope that the results presented in this paper can act as a guiding principle for realising effective high-dimensional
+Bayesian optimisation.
+
+## [Bayesian Optimisation with Compositional Optimisers](./CompBO)
+
+<div style="text-align:center"><img src="./CompBO/image/summary-Best-performance-on-Synthetic-tasks-matern-52-3.png" alt="drawing" width="600"/>
+
+<div style="text-align:left"></>
+
+Codebase associated
+to: [Are we Forgetting about Compositional Optimisers in Bayesian Optimisation?](https://www.jmlr.org/papers/v22/20-1422.html)
+accepted at **JMLR**.
+
+##### Abstract
+
+Bayesian optimisation presents a sample-efficient methodology for global optimisation. Within this framework, a crucial
+performance-determining subroutine is the maximisation of the acquisition function, a task complicated by the fact that
+acquisition functions tend to be non-convex and thus nontrivial to optimise. In this paper, we undertake a comprehensive
+empirical study of approaches to maximise the acquisition function. Additionally, by deriving novel, yet mathematically
+equivalent, compositional forms for popular acquisition functions, we recast the maximisation task as a compositional
+optimisation problem, allowing us to benefit from the extensive literature in this field. We highlight the empirical
+advantages of the compositional approach to acquisition function maximisation across 3958 individual experiments
+comprising synthetic optimisation tasks as well as tasks from Bayesmark. Given the generality of the acquisition
+function maximisation subroutine, we posit that the adoption of compositional optimisers has the potential to yield
+performance improvements across all domains in which Bayesian optimisation is currently being applied.
+
+## [AIRBO](./AIRBO)
+
+<div style="text-align:center"><img src="./AIRBO/figures\opt_performance.png" alt="drawing" width="600"/>
+
+
+
+Codebase associated
+to: [Efficient Robust Bayesian Optimization for Arbitrary Uncertain Inputs](https://arxiv.org/abs/2310.20145)
+
+##### Abstract
+
+Bayesian Optimization (BO) is a sample-efficient optimization algorithm widely employed across various applications. In
+some challenging BO tasks, input uncertainty arises due to the inevitable randomness in the optimization process, such
+as machining errors, execution noise, or contextual variability. This uncertainty deviates the input from the intended
+value before evaluation, resulting in significant performance fluctuations in the final result. In this paper, we
+introduce a novel robust Bayesian Optimization algorithm, AIRBO, which can effectively identify a robust optimum that
+performs consistently well under arbitrary input uncertainty. Our method directly models the uncertain inputs of
+arbitrary distributions by empowering the Gaussian Process with the Maximum Mean Discrepancy (MMD) and further
+accelerates the posterior inference via Nystrom approximation. Rigorous theoretical regret bound is established under
+MMD estimation error and extensive experiments on synthetic functions and real problems demonstrate that our approach
+can handle various input uncertainties and achieve state-of-the-art performance.
+
+# Reinforcement Learning Research
+
+## [Enhancing Reinforcement Learning Agents with Local Guides](./RLLG)
+
+Codebase associated
+to [paper: Enhancing Reinforcement Learning Agents with Local Guides](https://hal.science/hal-04052358/file/Final_Reinforcement_Learning_with_Local_Guides.pdf)
+
+##### Abstract
+
+This paper addresses the problem of integrating local guide policies into a Reinforcement Learning agent. For this, we
+show how
+to adapt existing algorithms to this setting before introducing a
+novel algorithm based on a noisy policy-switching procedure. This
+approach builds on a proper Approximate Policy Evaluation (APE)
+scheme to provide a perturbation that carefully leads the local
+guides towards better actions. We evaluated our method on a set
+of classical Reinforcement Learning problems, including safetycritical systems where the agent cannot enter some areas
+at the
+risk of triggering catastrophic consequences. In all the proposed
+environments, our agent proved to be efficient at leveraging those
+policies to improve the performance of any APE-based Reinforcement Learning algorithm, especially in its first learning
+stages.
+
+## [Sauté RL and Simmer RL: Safe Reinforcement Learning Using Safety State Augmentation](./SIMMER)
+
+Codebase associated to: [Sauté RL: Almost Surely Safe RL Using State Augmentation](https://arxiv.org/pdf/2202.06558.pdf)
+and  [Enhancing Safe Exploration Using Safety State Augmentation](https://arxiv.org/pdf/2206.02675.pdf).
+
+##### Abstract for Sauté RL: Almost Surely Safe RL Using State Augmentation (ICML 2022)
+
+Satisfying safety constraints almost surely (or with probability one) can be critical for deployment of Reinforcement
+Learning (RL) in real-life applications. For example, plane landing and take-off should ideally occur with probability
+one. We address the problem by introducing Safety Augmented (Saute) Markov Decision Processes (MDPs), where the safety
+constraints are eliminated by augmenting them into the state-space and reshaping the objective. We show that Saute MDP
+satisfies the Bellman equation and moves us closer to solving Safe RL with constraints satisfied almost surely. We argue
+that Saute MDP allows to view Safe RL problem from a different perspective enabling new features. For instance, our
+approach has a plug-and-play nature, i.e., any RL algorithm can be "sauteed". Additionally, state augmentation allows
+for policy generalization across safety constraints. We finally show that Saute RL algorithms can outperform their
+state-of-the-art counterparts when constraint satisfaction is of high importance.
+
+##### Abstract for Effects of Safety State Augmentation on Safe Exploration (NeurIPS 2022)
+
+Safe exploration is a challenging and important problem in model-free reinforcement learning (RL). Often the safety cost
+is sparse and unknown, which unavoidably leads to constraint violations -- a phenomenon ideally to be avoided in
+safety-critical applications. We tackle this problem by augmenting the state-space with a safety state, which is
+nonnegative if and only if the constraint is satisfied. The value of this state also serves as a distance toward
+constraint violation, while its initial value indicates the available safety budget. This idea allows us to derive
+policies for scheduling the safety budget during training. We call our approach Simmer (Safe policy IMproveMEnt for
+RL) to reflect the careful nature of these schedules. We apply this idea to two safe RL problems: RL with constraints
+imposed on an average cost, and RL with constraints imposed on a cost with probability one. Our experiments suggest
+that simmering a safe algorithm can improve safety during training for both settings. We further show that Simmer can
+stabilize training and improve the performance of safe RL with average constraints.
+
+## [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](./PMDB)
+
+Code associdated
+to: [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](https://nips.cc/Conferences/2022/Schedule?showEvent=54842)
+accepted
+at **NeurIPS (2022)** conference.
+
+#### Abstract
+
+Model-based offline reinforcement learning (RL) aims to find highly rewarding policy, by leveraging a previously
+collected static dataset and a dynamics model. While learned through reuse of static dataset, the dynamics model's
+generalization ability hopefully promotes policy learning if properly utilized. To that end, several works propose to
+quantify the uncertainty of predicted dynamics, and explicitly apply it to penalize reward. However, as the dynamics and
+the reward are intrinsically different factors in context of MDP, characterizing the impact of dynamics uncertainty
+through reward penalty may incur unexpected tradeoff between model utilization and risk avoidance. In this work, we
+instead maintain a belief distribution over dynamics, and evaluate/optimize policy through biased sampling from the
+belief. The sampling procedure, biased towards pessimism, is derived based on an alternating Markov game formulation
+of offline RL. We formally show that the biased sampling naturally induces an updated dynamics belief with
+policy-dependent reweighting factor, termed *Pessimism-Modulated Dynamics Belief*. To improve policy, we devise an
+iterative regularized policy optimization algorithm for the game, with guarantee of monotonous improvement under certain
+condition. To make practical, we further devise an offline RL algorithm to approximately find the solution. Empirical
+results show that the proposed approach achieves state-of-the-art performance on a wide range of benchmark tasks.

From 76b38a95dd747e455d5e134d9d4123b05233f570 Mon Sep 17 00:00:00 2001
From: Yuhui <yuhui.wan@huawei.com>
Date: Mon, 8 Jul 2024 15:45:08 +0100
Subject: [PATCH 5/5] Add ROS interafce on ROS side

---
 ROSLLM/ros_agent/CMakeLists.txt               |  28 +++++
 ROSLLM/ros_agent/README.md                    |  33 ++++++
 .../ros_agent/example/agentactionservice.py   |  29 +++++
 ROSLLM/ros_agent/example/publish_time.py      |  21 ++++
 ROSLLM/ros_agent/package.xml                  |  21 ++++
 ROSLLM/ros_agent/scripts/command_window.py    |  25 ++++
 ROSLLM/ros_agent/scripts/ros_agent_node.py    | 110 ++++++++++++++++++
 ROSLLM/ros_agent/srv/HandleAgentAction.srv    |   5 +
 8 files changed, 272 insertions(+)
 create mode 100644 ROSLLM/ros_agent/CMakeLists.txt
 create mode 100644 ROSLLM/ros_agent/README.md
 create mode 100755 ROSLLM/ros_agent/example/agentactionservice.py
 create mode 100644 ROSLLM/ros_agent/example/publish_time.py
 create mode 100644 ROSLLM/ros_agent/package.xml
 create mode 100644 ROSLLM/ros_agent/scripts/command_window.py
 create mode 100644 ROSLLM/ros_agent/scripts/ros_agent_node.py
 create mode 100644 ROSLLM/ros_agent/srv/HandleAgentAction.srv

diff --git a/ROSLLM/ros_agent/CMakeLists.txt b/ROSLLM/ros_agent/CMakeLists.txt
new file mode 100644
index 00000000..ded38f52
--- /dev/null
+++ b/ROSLLM/ros_agent/CMakeLists.txt
@@ -0,0 +1,28 @@
+cmake_minimum_required(VERSION 3.0.2)
+project(ros_agent)
+
+find_package(catkin REQUIRED COMPONENTS
+  rospy
+  std_msgs
+  message_generation
+)
+
+# catkin_python_setup()
+
+add_service_files(
+  FILES
+  HandleAgentAction.srv
+)
+
+generate_messages(
+  DEPENDENCIES
+  std_msgs
+)
+
+catkin_package(
+  CATKIN_DEPENDS message_runtime
+)
+
+include_directories(
+  ${catkin_INCLUDE_DIRS}
+)
\ No newline at end of file
diff --git a/ROSLLM/ros_agent/README.md b/ROSLLM/ros_agent/README.md
new file mode 100644
index 00000000..a5015936
--- /dev/null
+++ b/ROSLLM/ros_agent/README.md
@@ -0,0 +1,33 @@
+# Agent interface
+
+This packge allow you to communicate with the Agent
+
+## Documentation
+
+### To start the Flask communication node:
+In ROS environment run 
+
+```
+python ../scripts/ros_agent_node.py
+```
+
+Please refer the document in the agent for strating the agent.
+
+### To stop the node:
+Tpye ``exit`` in the human input
+
+## Examples:
+To publish current time to the obsercvation:
+In ROS environment run 
+
+```
+python ../example/publish_time.py
+```
+
+
+To strat the action service:
+In ROS environment run 
+
+```
+python ../example/agentactionservice.py
+```
\ No newline at end of file
diff --git a/ROSLLM/ros_agent/example/agentactionservice.py b/ROSLLM/ros_agent/example/agentactionservice.py
new file mode 100755
index 00000000..86460913
--- /dev/null
+++ b/ROSLLM/ros_agent/example/agentactionservice.py
@@ -0,0 +1,29 @@
+#!/usr/bin/env python
+import rospy
+from ros_agent.srv import HandleAgentAction, HandleAgentActionResponse
+
+def handle_action(req):
+    """
+    Process the action request and return the response.
+    This function simulates action processing by returning a success status,
+    a message, and a reward.
+    """
+    print("Received action request: {}".format(req.action))
+    
+    # Here you would add the logic to process the action, e.g., controlling a robot or running an algorithm
+    response_message = "Action processed successfully"
+    reward = 1.0  # Example fixed reward; adjust based on actual action processing logic
+    
+    return HandleAgentActionResponse(success=True, response=response_message, reward=reward)
+
+def action_service():
+    rospy.init_node('agent_action_service')
+    
+    # Create the service 'handle_agent_action' and specify the handler function
+    s = rospy.Service('handle_agent_action', HandleAgentAction, handle_action)
+    
+    print("Service 'handle_agent_action' ready to handle requests.")
+    rospy.spin()  # Keep the service open.
+
+if __name__ == "__main__":
+    action_service()
diff --git a/ROSLLM/ros_agent/example/publish_time.py b/ROSLLM/ros_agent/example/publish_time.py
new file mode 100644
index 00000000..d8091e7c
--- /dev/null
+++ b/ROSLLM/ros_agent/example/publish_time.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+import rospy
+from std_msgs.msg import String
+import datetime
+
+def publish_timestamp():
+    rospy.init_node('timestamp_publisher', anonymous=True)
+    publisher = rospy.Publisher('agent_environment', String, queue_size=10)
+    rate = rospy.Rate(5)  # Frequency of 1 Hz
+    
+    while not rospy.is_shutdown():
+        current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        rospy.loginfo(f"Publishing current timestamp: {current_time}")
+        publisher.publish(current_time)
+        rate.sleep()
+
+if __name__ == '__main__':
+    try:
+        publish_timestamp()
+    except rospy.ROSInterruptException:
+        pass
diff --git a/ROSLLM/ros_agent/package.xml b/ROSLLM/ros_agent/package.xml
new file mode 100644
index 00000000..611e6d0a
--- /dev/null
+++ b/ROSLLM/ros_agent/package.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<package format="2">
+  <name>ros_agent</name>
+  <version>0.0.0</version>
+  <description>The ros_agent package</description>
+
+  <maintainer email="hyu@todo.todo">hyu</maintainer>
+  <maintainer email="christopher.mower@huawei.com">cmower</maintainer>
+
+  <license>TODO</license>
+
+  <author email="hyu@todo.todo">hyu</author>
+  <author email="christopher.mower@huawei.com">cmower</author>
+
+  <buildtool_depend>catkin</buildtool_depend>
+  <depend>rospy</depend>
+  <depend>std_msgs</depend>
+  <build_depend>message_generation</build_depend>
+  <exec_depend>message_runtime</exec_depend>
+
+</package>
diff --git a/ROSLLM/ros_agent/scripts/command_window.py b/ROSLLM/ros_agent/scripts/command_window.py
new file mode 100644
index 00000000..9dffc984
--- /dev/null
+++ b/ROSLLM/ros_agent/scripts/command_window.py
@@ -0,0 +1,25 @@
+from flask import Flask, request, jsonify
+
+app = Flask(__name__)
+
+@app.route('/', methods=['POST'])
+def receive_string():
+    if request.method == 'POST':
+        # Receive the JSON data
+        data = request.get_json()
+
+        # Extract the string from the JSON data
+        received_string = data.get('msg')
+
+        # Prompt the user to input another string
+        user_input_string = input(f"{received_string}\n")
+
+        # Prepare the response JSON data
+        response_data = {
+            'received_string': received_string,
+            'user_input_string': user_input_string
+        }
+        return jsonify(response_data)
+
+if __name__ == '__main__':
+    app.run(host='0.0.0.0', port=5002, debug=True, use_reloader=False)
diff --git a/ROSLLM/ros_agent/scripts/ros_agent_node.py b/ROSLLM/ros_agent/scripts/ros_agent_node.py
new file mode 100644
index 00000000..d50daa89
--- /dev/null
+++ b/ROSLLM/ros_agent/scripts/ros_agent_node.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+import rospy
+import os
+import signal
+
+from ros_agent.srv import HandleAgentAction, HandleAgentActionRequest
+from flask import Flask, request, jsonify
+
+from std_srvs.srv import Trigger
+from std_msgs.msg import String
+
+class AgentApi:
+
+    def __init__(self, node):
+        self.node = node
+        self.app = Flask(__name__)
+        self.setup_routes()
+
+    def setup_routes(self):
+        self.app.add_url_rule('/llmreq', 'handle_request_llm', self.handle_request_llm,  methods=['POST'])
+        self.app.add_url_rule('/rosenv', 'handle_request_env', self.handle_request_env,  methods=['POST'])
+        
+    def handle_request_llm(self):
+        if not request.json or 'action' not in request.json:
+            return jsonify({"obs": "Request body must contain 'action' field",
+                            "reward":0,'success':False}), 400
+
+        action = request.json['action']
+        success, resp, reward = self.node.handle_action(action)
+        print("Agent response:",action)
+        return jsonify({"done": success, "obs": resp, "reward": reward})
+
+    def handle_request_env(self):
+        print("handle env request OK")
+        self.node.request_human_feedback()
+        obs = self.node.get_combined_observation()
+        # print(obs)
+        return jsonify({"obs": obs, 'success': True if obs else False})
+          
+
+    def run_flask_app(self):
+        self.app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=False)
+
+
+class Node:
+
+
+    def __init__(self):
+        rospy.init_node("ros_agent_node", anonymous=True)
+        self.srv_name = "handle_agent_action"
+        self.latest_env_observation = "No observation yet"
+        self.latest_human_feedback = "No human input yet"
+        rospy.Subscriber("agent_environment", String, self.observation_callback)
+        # rospy.Subscriber("human_feedback", String, self.feedback_callback)  # Subscribe to the human_feedback topic
+        self.action_publisher = rospy.Publisher("agent_action", String, queue_size=10)
+        self.api = AgentApi(self)
+        self.api.run_flask_app()
+        rospy.loginfo("initialized ros_agent node")
+
+    def handle_action(self, action):
+        try:
+            self.action_publisher.publish(action)  # Publish the action to the ROS network
+            handle_agent_action = rospy.ServiceProxy(self.srv_name, HandleAgentAction)
+            req = HandleAgentActionRequest(action=action)
+            resp = handle_agent_action(req)
+            rospy.loginfo(f"Action response: {resp.response}")
+            return resp.success, resp.response, resp.reward
+        except rospy.ServiceException as e:
+            success = False
+            response = f"handling request failed: {e}"
+            rospy.logwarn(f"handling request failed: {e}")
+            return success, response, 0.0
+
+    def request_human_feedback(self):
+        """Prompt for human feedback from the terminal."""
+        self.latest_human_feedback = input(" Human, please enter input: ")
+        if self.latest_human_feedback.lower() == "exit":
+            print("Killing program...")
+            os.kill(os.getpid(), signal.SIGKILL)
+        
+
+    def observation_callback(self, msg):
+        """Callback function to update the latest observed state."""
+        self.latest_env_observation = msg.data
+
+    # To read the humanfeedback from a topic 
+    # def feedback_callback(self, msg):
+    #     """Callback function to update the latest human feedback."""
+    #     self.latest_human_feedback = msg.data
+
+    
+    def get_combined_observation(self):
+        """Combine the latest observation with the latest human feedback."""
+        combined = f"Environment Observation: {self.latest_env_observation} | Human Input: {self.latest_human_feedback}"
+        print(f"Message to Agent: Environment Observation: {self.latest_env_observation} | Human Input: {self.latest_human_feedback} \n")
+        return combined
+    
+    
+    def spin(self):
+        while not rospy.is_shutdown():
+            self.request_human_feedback()
+            rospy.spin()
+
+
+def main():
+    Node().spin()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/ROSLLM/ros_agent/srv/HandleAgentAction.srv b/ROSLLM/ros_agent/srv/HandleAgentAction.srv
new file mode 100644
index 00000000..1a9897ae
--- /dev/null
+++ b/ROSLLM/ros_agent/srv/HandleAgentAction.srv
@@ -0,0 +1,5 @@
+string action
+---
+bool success
+string response
+float64 reward