From 81d21a28bd354019977e4bc2e8ca75a3e4e867cf Mon Sep 17 00:00:00 2001 From: Yuhui Date: Mon, 8 Jul 2024 13:42:02 +0100 Subject: [PATCH 1/5] Modify readme --- Agent/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Agent/README.md b/Agent/README.md index 1522a500..2fba80ef 100644 --- a/Agent/README.md +++ b/Agent/README.md @@ -12,3 +12,5 @@ make html ``` There are also several helpful [tutorials](tutorials/) to help you get started with running and customizing Agent. + +## ROS interface \ No newline at end of file From 62fc761792480164c7aa005aaa83644c043f5527 Mon Sep 17 00:00:00 2001 From: Yuhui Date: Mon, 8 Jul 2024 14:08:24 +0100 Subject: [PATCH 2/5] Add ROS interface --- Agent/configs/task/ros_task.yaml | 16 + .../templates/default/direct_prompt.jinja | 3 + .../templates/default/trajectory.jinja | 2 +- Agent/src/agent/tasks/ros_api.py | 67 ++++ Agent/src/agent/tasks/ros_task.py | 51 +++ README.md | 346 ++---------------- 6 files changed, 167 insertions(+), 318 deletions(-) create mode 100644 Agent/configs/task/ros_task.yaml create mode 100644 Agent/src/agent/prompts/templates/default/direct_prompt.jinja create mode 100644 Agent/src/agent/tasks/ros_api.py create mode 100644 Agent/src/agent/tasks/ros_task.py diff --git a/Agent/configs/task/ros_task.yaml b/Agent/configs/task/ros_task.yaml new file mode 100644 index 00000000..58a6f40b --- /dev/null +++ b/Agent/configs/task/ros_task.yaml @@ -0,0 +1,16 @@ +# @package _global_ +agent: + prompt_builder: + template_paths: + - default + + +task: + _target_: agent.tasks.ros_task.ROSTask + name: simplebot_test_env + description: + subtask: null + version: v0.1 + +max_env_steps: 1 +max_episodes: 100 \ No newline at end of file diff --git a/Agent/src/agent/prompts/templates/default/direct_prompt.jinja b/Agent/src/agent/prompts/templates/default/direct_prompt.jinja new file mode 100644 index 00000000..77ff8ef2 --- /dev/null +++ b/Agent/src/agent/prompts/templates/default/direct_prompt.jinja @@ -0,0 +1,3 @@ +Now please answer the question. +Answer in the format +Answer: diff --git a/Agent/src/agent/prompts/templates/default/trajectory.jinja b/Agent/src/agent/prompts/templates/default/trajectory.jinja index 02485bbd..19a80b5b 100644 --- a/Agent/src/agent/prompts/templates/default/trajectory.jinja +++ b/Agent/src/agent/prompts/templates/default/trajectory.jinja @@ -3,4 +3,4 @@ Here is what happened so far: {%- if action %} Action: {{action}} {%- endif %} -Observation: {{memory.retrieve({memory.mem_keys.OBSERVATION: 1.0})}} +Current: {{memory.retrieve({memory.mem_keys.OBSERVATION: 1.0})}} diff --git a/Agent/src/agent/tasks/ros_api.py b/Agent/src/agent/tasks/ros_api.py new file mode 100644 index 00000000..02b5171f --- /dev/null +++ b/Agent/src/agent/tasks/ros_api.py @@ -0,0 +1,67 @@ +import requests + + +class RosApi: + + # Defined on ros side + default_timeout = 3 * 60 # default timeout is 3 minutes + + def __init__(self, timeout=None): + self.timeout = self.default_timeout if timeout is None else timeout + self._response = None + + @staticmethod + def bad_response(obs): + return { + "success": False, + "done": True, + "reward": 0, + "obs": obs, + } + + def send_action(self, action): + url = "http://localhost:5000/llmreq" + self._response = None # reset response + try: + data = {"action": action} + resp = requests.post(url, json=data, timeout=self.timeout) + response = resp.json() + except requests.exceptions.Timeout: + response = self.bad_response("Request timeout.") + except requests.exceptions.RequestException as e: + response = self.bad_response(f"Request exception: {e}") + self._response = response + + def get_env_observation(self): + url = "http://localhost:5000/rosenv" + self._response = None # reset response + try: + data = {"": ""} + resp = requests.post(url, json=data, timeout=self.timeout) + response = resp.json() + except requests.exceptions.Timeout: + response = self.bad_response("Request timeout.") + except requests.exceptions.RequestException as e: + response = self.bad_response(f"Request exception: {e}") + self._response = response + + return response + + def get_feedback(self): + url = "http://localhost:5000/rosfdb" + self._response = None # reset response + try: + data = {"": ""} + resp = requests.post(url, json=data, timeout=self.timeout) + response = resp.json() + except requests.exceptions.Timeout: + response = self.bad_response("Request timeout.") + except requests.exceptions.RequestException as e: + response = self.bad_response(f"Request exception: {e}") + self._response = response + + return response + + def receive_response(self): + assert self._response is not None, "did not receive a response" + return self._response diff --git a/Agent/src/agent/tasks/ros_task.py b/Agent/src/agent/tasks/ros_task.py new file mode 100644 index 00000000..a0c24d4d --- /dev/null +++ b/Agent/src/agent/tasks/ros_task.py @@ -0,0 +1,51 @@ +from agent.tasks import Task +from .ros_api import RosApi +from typing import Any, Dict +import warnings + + +class ROSTask(Task): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.ros_api = RosApi() + print(self.ros_api) + self.response = {"reward": 0.0, + "done": False, + "obs": ""} + self.possible_actions = [] + + def answer_parser(self, raw_response: str): + return raw_response + + def is_complete(self): + return self.response['done'] + + def reset(self, next_subtask: str | None = None) -> Dict[str, str]: + """Reset the environment and return the initial observation.""" + + if next_subtask is not None: + warnings.warn("ros_task does not support subtasks, ignoring subtask") + + response= self.ros_api.get_env_observation() + + return { + "_text_obs": response, + "_available_actions": self.possible_actions + } + + def get_observation(self): + obs = self.ros_api.get_env_observation() + fdb = self.ros_api.get_feedback() + return { + "_available_actions": obs, + "_available_actions": fdb + } + + def step(self, action): + print(action) + self.ros_api.send_action(action) + self.response = self.ros_api.receive_response() + print(self.response['obs']) + return {}, self.response["reward"], self.response["done"] + diff --git a/README.md b/README.md index 873fde76..b5b0f6fc 100644 --- a/README.md +++ b/README.md @@ -1,317 +1,29 @@ -# Bayesian Optimisation & Reinforcement Learning Research - -This directory contains official implementations for Bayesian optimization & Reinforcement Learning works developed by -Huawei, Noah's Ark Lab. - -- Bayesian Optimisation Research - - [HEBO: Heteroscedastic Evolutionary Bayesian Optimisation](./HEBO) - - [MCBO: Framework and Benchmarks for Combinatorial and Mixed-variable Bayesian Optimization](./MCBO) - - [NAP: End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](./NAP) - - [RDUCB: High Dimensional Bayesian Optimisation with Random Decompositions](./RDUCB) - - [AntBO: Antibody Design with Combinatorial Bayesian Optimisation](./AntBO) - - [BOiLS: Bayesian Optimisation for Logic Synthesis](./BOiLS) - - [T-LBO: BO in a latent space shaped by metric learning](./T-LBO) - - [Bayesian Optimisation with Compositional Optimisers](./CompBO) - - [AIRBO: Efficient Robust Bayesian Optimisation for Arbitrary Uncertain Inputs](./AIRBO) -- Reinforcement Learning Research - - [Enhancing Reinforcement Learning Agents with Local Guides](RLLG) - - [Sauté RL and Simmer RL: Safe Reinforcement Learning Using Safety State Augmentation ](./SIMMER) - - [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](./PMDB) -- Robotics Research - - [ROS-LLM: A ROS framework for embodied AI with task feedback and structured reasoning](./ROSLLM) - -Further instructions are provided in the README files associated to each project. - -# Bayesian Optimisation Research - -## [HEBO](./HEBO) - -drawing - -Bayesian optimization library developed by Huawei Noahs Ark Decision Making and Reasoning (DMnR) lab. The -winning submission to -the [NeurIPS 2020 Black-Box Optimisation Challenge](https://bbochallenge.com/leaderboard). - -## [MCBO](./MCBO) - -

- - -

- - -Codebase associated to: [Framework and Benchmarks for Combinatorial and Mixed-variable Bayesian Optimization -](https://arxiv.org/abs/2306.09803) accepted at **NeurIPS** (2023). - -##### Abstract - -This paper introduces a modular framework for Mixed-variable and Combinatorial -Bayesian Optimization (MCBO) to address the lack of systematic benchmarking -and standardized evaluation in the field. Current MCBO papers often introduce -non-diverse or non-standard benchmarks to evaluate their methods, impeding the -proper assessment of different MCBO primitives and their combinations. Additionally, papers introducing a solution for a -single MCBO primitive often omit -benchmarking against baselines that utilize the same methods for the remaining primitives. This omission is primarily -due to the significant implementation -overhead involved, resulting in a lack of controlled assessments and an inability -to showcase the merits of a contribution effectively. To overcome these challenges, our proposed framework enables an -effortless combination of Bayesian -Optimization components, and provides a diverse set of synthetic and real-world -benchmarking tasks. Leveraging this flexibility, we implement 47 novel MCBO -algorithms and benchmark them against seven existing MCBO solvers and five -standard black-box optimization algorithms on ten tasks, conducting over 4000 -experiments. Our findings reveal a superior combination of MCBO primitives -outperforming existing approaches and illustrate the significance of model fit and -the use of a trust region. We make our MCBO library available under the MIT -license at https://github.com/huawei-noah/HEBO/tree/master/MCBO. - -## [NAP: End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](./NAP/) - -![regret-all](./NAP/figures/regret-all.png) -Codebase associated -to: [End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](https://arxiv.org/abs/2305.15930), -accepted at **NeurIPS** (2023). - -##### Abstract - -Meta-Bayesian optimisation (meta-BO) aims to improve the sample efficiency of Bayesian optimisation by leveraging data -from related tasks. While previous methods successfully meta-learn either a surrogate model or an acquisition function -independently, joint training of both components remains an open challenge. This paper proposes the first end-to-end -differentiable meta-BO framework that generalises neural processes to learn acquisition functions via transformer -architectures. We enable this end-to-end framework with reinforcement learning (RL) to tackle the lack of labelled -acquisition data. Early on, we notice that training transformer-based neural processes from scratch with RL is -challenging due to insufficient supervision, especially when rewards are sparse. We formalise this claim with a -combinatorial analysis showing that the widely used notion of regret as a reward signal exhibits a logarithmic sparsity -pattern in trajectory lengths. To tackle this problem, we augment the RL objective with an auxiliary task that guides -part of the architecture to learn a valid probabilistic model as an inductive bias. We demonstrate that our method -achieves state-of-the-art regret results against various baselines in experiments on standard hyperparameter -optimisation tasks and also outperforms others in the real-world problems of mixed-integer programming tuning, antibody -design, and logic synthesis for electronic design automation. - -## [RDUCB: High Dimensional Bayesian Optimisation with Random Decompositions](./RDUCB) - -

- -

- -Codebase associated -to: [Are Random Decomositions all we need in High Dimensional Bayesian Optimisation](https://arxiv.org/pdf/2301.12844.pdf) -accepted at **ICML** (2023). - -##### Abstract - -Learning decompositions of expensive-to-evaluate black-box functions promises to scale Bayesian optimisation (BO) to -high-dimensional -problems. However, the success of these techniques depends on finding proper decompositions -that accurately represent the black-box. While previous works learn those decompositions based -on data, we investigate data-independent decomposition sampling rules in this paper. We find -that data-driven learners of decompositions can be easily misled towards local decompositions -that do not hold globally across the search space. Then, we formally show that a random tree-based -decomposition sampler exhibits favourable theoretical guarantees that effectively trade -off maximal information gain and functional mismatch between the actual black-box and its -surrogate as provided by the decomposition. Those results motivate the development of the -random decomposition upper-confidence bound algorithm (RDUCB) that is straightforward -to implement - (almost) plug-and-play - and, surprisingly, yields significant empirical gains -compared to the previous state-of-the-art on a comprehensive set of benchmarks. We also -confirm the plug-and-play nature of our modelling component by integrating our method with -HEBO, showing improved practical gains in the highest dimensional tasks from Bayesmark. - -## [AntBO: Antibody Design with Combinatorial Bayesian Optimisation](./AntBO) - -![AntBO overview](./AntBO/figures/AntBO_illustrationPNG.PNG?raw=true) - -Codebase associated -to: [AntBO: Towards Real-World Automated Antibody Design with Combinatorial Bayesian Optimisation](https://www.sciencedirect.com/science/article/pii/S2667237522002764) -published in **Cell Reports Method**s (2023). - -##### Abstract - -Antibodies are canonically Y-shaped multimeric proteins capable of highly specific molecular recognition. The CDRH3 -region located at the tip of variable chains of an antibody dominates antigen-binding specificity. Therefore, it is a -priority to design optimal antigen-specific CDRH3 regions to develop therapeutic antibodies to combat harmful pathogens. -However, the combinatorial nature of CDRH3 sequence space makes it impossible to search for an optimal binding sequence -exhaustively and efficiently, especially not experimentally. Here, we present AntBO: a Combinatorial Bayesian -Optimisation framework enabling efficient in silico design of the CDRH3 region. Ideally, antibodies should bind to -their target antigen and be free from any harmful outcomes. Therefore, we introduce the CDRH3 trust region that -restricts the search to sequences with feasible developability scores. To benchmark AntBO, we use the Absolut! software -suite as a black-box oracle because it can score the target specificity and affinity of designed antibodies in silico -in an unconstrained fashion. The results across 188 antigens demonstrate the benefit of AntBO in designing CDRH3 regions -with diverse biophysical properties. In under 200 protein designs, AntBO can suggest antibody sequences that outperform -the best binding sequence drawn from 6.9 million experimentally obtained CDRH3s and a commonly used genetic algorithm -baseline. Additionally, AntBO finds very-high affinity CDRH3 sequences in only 38 protein designs whilst requiring no -domain knowledge. We conclude AntBO brings automated antibody design methods closer to what is practically viable for -in vitro experimentation. - -## [BOiLS: Bayesian Optimisation for Logic Synthesis](./BOiLS) - -

- drawing -

- -Codebase associated to: [BOiLS: Bayesian Optimisation for Logic Synthesis](https://arxiv.org/abs/2111.06178) accepted -at **DATE22** conference. - -##### Abstract - -Optimising the quality-of-results (QoR) of circuits during logic synthesis is a formidable challenge necessitating the -exploration of exponentially sized search spaces. While expert-designed operations aid in uncovering effective -sequences, the increase in complexity of logic circuits favours automated procedures. Inspired by the successes of -machine learning, researchers adapted deep learning and reinforcement learning to logic synthesis applications. However -successful, those techniques suffer from high sample complexities preventing widespread adoption. To enable efficient -and scalable solutions, we propose BOiLS, the first algorithm adapting modern Bayesian optimisation to navigate the -space of synthesis operations. BOiLS requires no human intervention and effectively trades-off exploration versus -exploitation through novel Gaussian process kernels and trust-region constrained acquisitions. In a set of experiments -on EPFL benchmarks, we demonstrate BOiLS's superior performance compared to state-of-the-art in terms of both sample -efficiency and QoR values. - -## [T-LBO: BO in a latent space shaped by metric learning](./T-LBO) - -

- - -

- -Codebase associated -to: [High-Dimensional Bayesian Optimisation with Variational Autoencoders and Deep Metric Learning](https://arxiv.org/abs/2106.03609) - -##### Abstract - -We introduce a method based on deep metric learning to perform Bayesian optimisation over high-dimensional, structured -input spaces using variational autoencoders (VAEs). By extending ideas from supervised deep metric learning, we address -a longstanding problem in high-dimensional VAE Bayesian optimisation, namely how to enforce a discriminative latent -space as an inductive bias. Importantly, we achieve such an inductive bias using just 1% of the available labelled data -relative to previous work, highlighting the sample efficiency of our approach. As a theoretical contribution, we present -a proof of vanishing regret for our method. As an empirical contribution, we present state-of-the-art results on -real-world high-dimensional black-box optimisation problems including property-guided molecule generation. It is the -hope that the results presented in this paper can act as a guiding principle for realising effective high-dimensional -Bayesian optimisation. - -## [Bayesian Optimisation with Compositional Optimisers](./CompBO) - -
drawing - -
- -Codebase associated -to: [Are we Forgetting about Compositional Optimisers in Bayesian Optimisation?](https://www.jmlr.org/papers/v22/20-1422.html) -accepted at **JMLR**. - -##### Abstract - -Bayesian optimisation presents a sample-efficient methodology for global optimisation. Within this framework, a crucial -performance-determining subroutine is the maximisation of the acquisition function, a task complicated by the fact that -acquisition functions tend to be non-convex and thus nontrivial to optimise. In this paper, we undertake a comprehensive -empirical study of approaches to maximise the acquisition function. Additionally, by deriving novel, yet mathematically -equivalent, compositional forms for popular acquisition functions, we recast the maximisation task as a compositional -optimisation problem, allowing us to benefit from the extensive literature in this field. We highlight the empirical -advantages of the compositional approach to acquisition function maximisation across 3958 individual experiments -comprising synthetic optimisation tasks as well as tasks from Bayesmark. Given the generality of the acquisition -function maximisation subroutine, we posit that the adoption of compositional optimisers has the potential to yield -performance improvements across all domains in which Bayesian optimisation is currently being applied. - -## [AIRBO](./AIRBO) - -
drawing - - - -Codebase associated -to: [Efficient Robust Bayesian Optimization for Arbitrary Uncertain Inputs](https://arxiv.org/abs/2310.20145) - -##### Abstract - -Bayesian Optimization (BO) is a sample-efficient optimization algorithm widely employed across various applications. In -some challenging BO tasks, input uncertainty arises due to the inevitable randomness in the optimization process, such -as machining errors, execution noise, or contextual variability. This uncertainty deviates the input from the intended -value before evaluation, resulting in significant performance fluctuations in the final result. In this paper, we -introduce a novel robust Bayesian Optimization algorithm, AIRBO, which can effectively identify a robust optimum that -performs consistently well under arbitrary input uncertainty. Our method directly models the uncertain inputs of -arbitrary distributions by empowering the Gaussian Process with the Maximum Mean Discrepancy (MMD) and further -accelerates the posterior inference via Nystrom approximation. Rigorous theoretical regret bound is established under -MMD estimation error and extensive experiments on synthetic functions and real problems demonstrate that our approach -can handle various input uncertainties and achieve state-of-the-art performance. - -# Reinforcement Learning Research - -## [Enhancing Reinforcement Learning Agents with Local Guides](./RLLG) - -Codebase associated -to [paper: Enhancing Reinforcement Learning Agents with Local Guides](https://hal.science/hal-04052358/file/Final_Reinforcement_Learning_with_Local_Guides.pdf) - -##### Abstract - -This paper addresses the problem of integrating local guide policies into a Reinforcement Learning agent. For this, we -show how -to adapt existing algorithms to this setting before introducing a -novel algorithm based on a noisy policy-switching procedure. This -approach builds on a proper Approximate Policy Evaluation (APE) -scheme to provide a perturbation that carefully leads the local -guides towards better actions. We evaluated our method on a set -of classical Reinforcement Learning problems, including safetycritical systems where the agent cannot enter some areas -at the -risk of triggering catastrophic consequences. In all the proposed -environments, our agent proved to be efficient at leveraging those -policies to improve the performance of any APE-based Reinforcement Learning algorithm, especially in its first learning -stages. - -## [Sauté RL and Simmer RL: Safe Reinforcement Learning Using Safety State Augmentation](./SIMMER) - -Codebase associated to: [Sauté RL: Almost Surely Safe RL Using State Augmentation](https://arxiv.org/pdf/2202.06558.pdf) -and [Enhancing Safe Exploration Using Safety State Augmentation](https://arxiv.org/pdf/2206.02675.pdf). - -##### Abstract for Sauté RL: Almost Surely Safe RL Using State Augmentation (ICML 2022) - -Satisfying safety constraints almost surely (or with probability one) can be critical for deployment of Reinforcement -Learning (RL) in real-life applications. For example, plane landing and take-off should ideally occur with probability -one. We address the problem by introducing Safety Augmented (Saute) Markov Decision Processes (MDPs), where the safety -constraints are eliminated by augmenting them into the state-space and reshaping the objective. We show that Saute MDP -satisfies the Bellman equation and moves us closer to solving Safe RL with constraints satisfied almost surely. We argue -that Saute MDP allows to view Safe RL problem from a different perspective enabling new features. For instance, our -approach has a plug-and-play nature, i.e., any RL algorithm can be "sauteed". Additionally, state augmentation allows -for policy generalization across safety constraints. We finally show that Saute RL algorithms can outperform their -state-of-the-art counterparts when constraint satisfaction is of high importance. - -##### Abstract for Effects of Safety State Augmentation on Safe Exploration (NeurIPS 2022) - -Safe exploration is a challenging and important problem in model-free reinforcement learning (RL). Often the safety cost -is sparse and unknown, which unavoidably leads to constraint violations -- a phenomenon ideally to be avoided in -safety-critical applications. We tackle this problem by augmenting the state-space with a safety state, which is -nonnegative if and only if the constraint is satisfied. The value of this state also serves as a distance toward -constraint violation, while its initial value indicates the available safety budget. This idea allows us to derive -policies for scheduling the safety budget during training. We call our approach Simmer (Safe policy IMproveMEnt for -RL) to reflect the careful nature of these schedules. We apply this idea to two safe RL problems: RL with constraints -imposed on an average cost, and RL with constraints imposed on a cost with probability one. Our experiments suggest -that simmering a safe algorithm can improve safety during training for both settings. We further show that Simmer can -stabilize training and improve the performance of safe RL with average constraints. - -## [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](./PMDB) - -Code associdated -to: [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](https://nips.cc/Conferences/2022/Schedule?showEvent=54842) -accepted -at **NeurIPS (2022)** conference. - -#### Abstract - -Model-based offline reinforcement learning (RL) aims to find highly rewarding policy, by leveraging a previously -collected static dataset and a dynamics model. While learned through reuse of static dataset, the dynamics model's -generalization ability hopefully promotes policy learning if properly utilized. To that end, several works propose to -quantify the uncertainty of predicted dynamics, and explicitly apply it to penalize reward. However, as the dynamics and -the reward are intrinsically different factors in context of MDP, characterizing the impact of dynamics uncertainty -through reward penalty may incur unexpected tradeoff between model utilization and risk avoidance. In this work, we -instead maintain a belief distribution over dynamics, and evaluate/optimize policy through biased sampling from the -belief. The sampling procedure, biased towards pessimism, is derived based on an alternating Markov game formulation -of offline RL. We formally show that the biased sampling naturally induces an updated dynamics belief with -policy-dependent reweighting factor, termed *Pessimism-Modulated Dynamics Belief*. To improve policy, we devise an -iterative regularized policy optimization algorithm for the game, with guarantee of monotonous improvement under certain -condition. To make practical, we further devise an offline RL algorithm to approximately find the solution. Empirical -results show that the proposed approach achieves state-of-the-art performance on a wide range of benchmark tasks. - ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -## Codebase Contributors - - Current contributors: Antoine Grosnit, Alexandre Max Maravel, Taher Jafferjee, Wenlong Lyu, Kaiyang Guo, Juliusz Ziomek, Paul Daoudi, Merwan Barlier, Christopher E. Mower. - - Alumni / External contributors: Alexander I. Cowen-Rivers, Kamil Dreczkowski, Aivar Sootla, Ryan Rhys Griffiths, Zhi Wang, Ludovic Dos Santos, Bogdan Robu, Christophe Prieur. - +# Agent + +This library allows you to run many tasks using LLMs in a modular way! + +## Documentation + +You can compile the documentation yourself under `docs/` by running: + +```bash +cd docs +make html +``` + +There are also several helpful [tutorials](tutorials/) to help you get started with running and customizing Agent. + + +## ROS interface + +To run the agent with Flask interface with ROS + +In conda environment using the following command: + +``` +python ../src/agent/start.py task=ros_task method=direct llm@agent.llm=human +``` + +Replace the ``llm@agent.llm=human`` with your actual model. + +In ROS environment follow the instruction in the ros_pange_agent packge From 67592b4cd065fe3064d17c15989577f007feb490 Mon Sep 17 00:00:00 2001 From: Yuhui Date: Mon, 8 Jul 2024 14:12:42 +0100 Subject: [PATCH 3/5] Update readme --- Agent/README.md | 15 ++++++++++++++- README.md | 29 ----------------------------- 2 files changed, 14 insertions(+), 30 deletions(-) delete mode 100644 README.md diff --git a/Agent/README.md b/Agent/README.md index 2fba80ef..b5b0f6fc 100644 --- a/Agent/README.md +++ b/Agent/README.md @@ -13,4 +13,17 @@ make html There are also several helpful [tutorials](tutorials/) to help you get started with running and customizing Agent. -## ROS interface \ No newline at end of file + +## ROS interface + +To run the agent with Flask interface with ROS + +In conda environment using the following command: + +``` +python ../src/agent/start.py task=ros_task method=direct llm@agent.llm=human +``` + +Replace the ``llm@agent.llm=human`` with your actual model. + +In ROS environment follow the instruction in the ros_pange_agent packge diff --git a/README.md b/README.md deleted file mode 100644 index b5b0f6fc..00000000 --- a/README.md +++ /dev/null @@ -1,29 +0,0 @@ -# Agent - -This library allows you to run many tasks using LLMs in a modular way! - -## Documentation - -You can compile the documentation yourself under `docs/` by running: - -```bash -cd docs -make html -``` - -There are also several helpful [tutorials](tutorials/) to help you get started with running and customizing Agent. - - -## ROS interface - -To run the agent with Flask interface with ROS - -In conda environment using the following command: - -``` -python ../src/agent/start.py task=ros_task method=direct llm@agent.llm=human -``` - -Replace the ``llm@agent.llm=human`` with your actual model. - -In ROS environment follow the instruction in the ros_pange_agent packge From e4a47dc691445336ea66670ce67736a4bfdbf90e Mon Sep 17 00:00:00 2001 From: Yuhui Date: Mon, 8 Jul 2024 14:15:09 +0100 Subject: [PATCH 4/5] wrong readme before --- README.md | 306 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 306 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 00000000..412009cc --- /dev/null +++ b/README.md @@ -0,0 +1,306 @@ +# Bayesian Optimisation & Reinforcement Learning Research + +This directory contains official implementations for Bayesian optimization & Reinforcement Learning works developed by +Huawei, Noah's Ark Lab. + +- Bayesian Optimisation Research + - [HEBO: Heteroscedastic Evolutionary Bayesian Optimisation](./HEBO) + - [MCBO: Framework and Benchmarks for Combinatorial and Mixed-variable Bayesian Optimization](./MCBO) + - [NAP: End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](./NAP) + - [RDUCB: High Dimensional Bayesian Optimisation with Random Decompositions](./RDUCB) + - [AntBO: Antibody Design with Combinatorial Bayesian Optimisation](./AntBO) + - [BOiLS: Bayesian Optimisation for Logic Synthesis](./BOiLS) + - [T-LBO: BO in a latent space shaped by metric learning](./T-LBO) + - [Bayesian Optimisation with Compositional Optimisers](./CompBO) + - [AIRBO: Efficient Robust Bayesian Optimisation for Arbitrary Uncertain Inputs](./AIRBO) +- Reinforcement Learning Research + - [Enhancing Reinforcement Learning Agents with Local Guides](RLLG) + - [Sauté RL and Simmer RL: Safe Reinforcement Learning Using Safety State Augmentation ](./SIMMER) + - [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](./PMDB) + +Further instructions are provided in the README files associated to each project. + +# Bayesian Optimisation Research + +## [HEBO](./HEBO) + +drawing + +Bayesian optimization library developed by Huawei Noahs Ark Decision Making and Reasoning (DMnR) lab. The +winning submission to +the [NeurIPS 2020 Black-Box Optimisation Challenge](https://bbochallenge.com/leaderboard). + +## [MCBO](./MCBO) + +

+ + +

+ + +Codebase associated to: [Framework and Benchmarks for Combinatorial and Mixed-variable Bayesian Optimization +](https://arxiv.org/abs/2306.09803) accepted at **NeurIPS** (2023). + +##### Abstract + +This paper introduces a modular framework for Mixed-variable and Combinatorial +Bayesian Optimization (MCBO) to address the lack of systematic benchmarking +and standardized evaluation in the field. Current MCBO papers often introduce +non-diverse or non-standard benchmarks to evaluate their methods, impeding the +proper assessment of different MCBO primitives and their combinations. Additionally, papers introducing a solution for a +single MCBO primitive often omit +benchmarking against baselines that utilize the same methods for the remaining primitives. This omission is primarily +due to the significant implementation +overhead involved, resulting in a lack of controlled assessments and an inability +to showcase the merits of a contribution effectively. To overcome these challenges, our proposed framework enables an +effortless combination of Bayesian +Optimization components, and provides a diverse set of synthetic and real-world +benchmarking tasks. Leveraging this flexibility, we implement 47 novel MCBO +algorithms and benchmark them against seven existing MCBO solvers and five +standard black-box optimization algorithms on ten tasks, conducting over 4000 +experiments. Our findings reveal a superior combination of MCBO primitives +outperforming existing approaches and illustrate the significance of model fit and +the use of a trust region. We make our MCBO library available under the MIT +license at https://github.com/huawei-noah/HEBO/tree/master/MCBO. + +## [NAP: End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](./NAP/) + +![regret-all](./NAP/figures/regret-all.png) +Codebase associated +to: [End-to-End Meta-Bayesian Optimisation with Transformer Neural Processes](https://arxiv.org/abs/2305.15930), +accepted at **NeurIPS** (2023). + +##### Abstract + +Meta-Bayesian optimisation (meta-BO) aims to improve the sample efficiency of Bayesian optimisation by leveraging data +from related tasks. While previous methods successfully meta-learn either a surrogate model or an acquisition function +independently, joint training of both components remains an open challenge. This paper proposes the first end-to-end +differentiable meta-BO framework that generalises neural processes to learn acquisition functions via transformer +architectures. We enable this end-to-end framework with reinforcement learning (RL) to tackle the lack of labelled +acquisition data. Early on, we notice that training transformer-based neural processes from scratch with RL is +challenging due to insufficient supervision, especially when rewards are sparse. We formalise this claim with a +combinatorial analysis showing that the widely used notion of regret as a reward signal exhibits a logarithmic sparsity +pattern in trajectory lengths. To tackle this problem, we augment the RL objective with an auxiliary task that guides +part of the architecture to learn a valid probabilistic model as an inductive bias. We demonstrate that our method +achieves state-of-the-art regret results against various baselines in experiments on standard hyperparameter +optimisation tasks and also outperforms others in the real-world problems of mixed-integer programming tuning, antibody +design, and logic synthesis for electronic design automation. + +## [RDUCB: High Dimensional Bayesian Optimisation with Random Decompositions](./RDUCB) + +

+ +

+ +Codebase associated +to: [Are Random Decomositions all we need in High Dimensional Bayesian Optimisation](https://arxiv.org/pdf/2301.12844.pdf) +accepted at **ICML** (2023). + +##### Abstract + +Learning decompositions of expensive-to-evaluate black-box functions promises to scale Bayesian optimisation (BO) to +high-dimensional +problems. However, the success of these techniques depends on finding proper decompositions +that accurately represent the black-box. While previous works learn those decompositions based +on data, we investigate data-independent decomposition sampling rules in this paper. We find +that data-driven learners of decompositions can be easily misled towards local decompositions +that do not hold globally across the search space. Then, we formally show that a random tree-based +decomposition sampler exhibits favourable theoretical guarantees that effectively trade +off maximal information gain and functional mismatch between the actual black-box and its +surrogate as provided by the decomposition. Those results motivate the development of the +random decomposition upper-confidence bound algorithm (RDUCB) that is straightforward +to implement - (almost) plug-and-play - and, surprisingly, yields significant empirical gains +compared to the previous state-of-the-art on a comprehensive set of benchmarks. We also +confirm the plug-and-play nature of our modelling component by integrating our method with +HEBO, showing improved practical gains in the highest dimensional tasks from Bayesmark. + +## [AntBO: Antibody Design with Combinatorial Bayesian Optimisation](./AntBO) + +![AntBO overview](./AntBO/figures/AntBO_illustrationPNG.PNG?raw=true) + +Codebase associated +to: [AntBO: Towards Real-World Automated Antibody Design with Combinatorial Bayesian Optimisation](https://www.sciencedirect.com/science/article/pii/S2667237522002764) +published in **Cell Reports Method**s (2023). + +##### Abstract + +Antibodies are canonically Y-shaped multimeric proteins capable of highly specific molecular recognition. The CDRH3 +region located at the tip of variable chains of an antibody dominates antigen-binding specificity. Therefore, it is a +priority to design optimal antigen-specific CDRH3 regions to develop therapeutic antibodies to combat harmful pathogens. +However, the combinatorial nature of CDRH3 sequence space makes it impossible to search for an optimal binding sequence +exhaustively and efficiently, especially not experimentally. Here, we present AntBO: a Combinatorial Bayesian +Optimisation framework enabling efficient in silico design of the CDRH3 region. Ideally, antibodies should bind to +their target antigen and be free from any harmful outcomes. Therefore, we introduce the CDRH3 trust region that +restricts the search to sequences with feasible developability scores. To benchmark AntBO, we use the Absolut! software +suite as a black-box oracle because it can score the target specificity and affinity of designed antibodies in silico +in an unconstrained fashion. The results across 188 antigens demonstrate the benefit of AntBO in designing CDRH3 regions +with diverse biophysical properties. In under 200 protein designs, AntBO can suggest antibody sequences that outperform +the best binding sequence drawn from 6.9 million experimentally obtained CDRH3s and a commonly used genetic algorithm +baseline. Additionally, AntBO finds very-high affinity CDRH3 sequences in only 38 protein designs whilst requiring no +domain knowledge. We conclude AntBO brings automated antibody design methods closer to what is practically viable for +in vitro experimentation. + +## [BOiLS: Bayesian Optimisation for Logic Synthesis](./BOiLS) + +

+ drawing +

+ +Codebase associated to: [BOiLS: Bayesian Optimisation for Logic Synthesis](https://arxiv.org/abs/2111.06178) accepted +at **DATE22** conference. + +##### Abstract + +Optimising the quality-of-results (QoR) of circuits during logic synthesis is a formidable challenge necessitating the +exploration of exponentially sized search spaces. While expert-designed operations aid in uncovering effective +sequences, the increase in complexity of logic circuits favours automated procedures. Inspired by the successes of +machine learning, researchers adapted deep learning and reinforcement learning to logic synthesis applications. However +successful, those techniques suffer from high sample complexities preventing widespread adoption. To enable efficient +and scalable solutions, we propose BOiLS, the first algorithm adapting modern Bayesian optimisation to navigate the +space of synthesis operations. BOiLS requires no human intervention and effectively trades-off exploration versus +exploitation through novel Gaussian process kernels and trust-region constrained acquisitions. In a set of experiments +on EPFL benchmarks, we demonstrate BOiLS's superior performance compared to state-of-the-art in terms of both sample +efficiency and QoR values. + +## [T-LBO: BO in a latent space shaped by metric learning](./T-LBO) + +

+ + +

+ +Codebase associated +to: [High-Dimensional Bayesian Optimisation with Variational Autoencoders and Deep Metric Learning](https://arxiv.org/abs/2106.03609) + +##### Abstract + +We introduce a method based on deep metric learning to perform Bayesian optimisation over high-dimensional, structured +input spaces using variational autoencoders (VAEs). By extending ideas from supervised deep metric learning, we address +a longstanding problem in high-dimensional VAE Bayesian optimisation, namely how to enforce a discriminative latent +space as an inductive bias. Importantly, we achieve such an inductive bias using just 1% of the available labelled data +relative to previous work, highlighting the sample efficiency of our approach. As a theoretical contribution, we present +a proof of vanishing regret for our method. As an empirical contribution, we present state-of-the-art results on +real-world high-dimensional black-box optimisation problems including property-guided molecule generation. It is the +hope that the results presented in this paper can act as a guiding principle for realising effective high-dimensional +Bayesian optimisation. + +## [Bayesian Optimisation with Compositional Optimisers](./CompBO) + +
drawing + +
+ +Codebase associated +to: [Are we Forgetting about Compositional Optimisers in Bayesian Optimisation?](https://www.jmlr.org/papers/v22/20-1422.html) +accepted at **JMLR**. + +##### Abstract + +Bayesian optimisation presents a sample-efficient methodology for global optimisation. Within this framework, a crucial +performance-determining subroutine is the maximisation of the acquisition function, a task complicated by the fact that +acquisition functions tend to be non-convex and thus nontrivial to optimise. In this paper, we undertake a comprehensive +empirical study of approaches to maximise the acquisition function. Additionally, by deriving novel, yet mathematically +equivalent, compositional forms for popular acquisition functions, we recast the maximisation task as a compositional +optimisation problem, allowing us to benefit from the extensive literature in this field. We highlight the empirical +advantages of the compositional approach to acquisition function maximisation across 3958 individual experiments +comprising synthetic optimisation tasks as well as tasks from Bayesmark. Given the generality of the acquisition +function maximisation subroutine, we posit that the adoption of compositional optimisers has the potential to yield +performance improvements across all domains in which Bayesian optimisation is currently being applied. + +## [AIRBO](./AIRBO) + +
drawing + + + +Codebase associated +to: [Efficient Robust Bayesian Optimization for Arbitrary Uncertain Inputs](https://arxiv.org/abs/2310.20145) + +##### Abstract + +Bayesian Optimization (BO) is a sample-efficient optimization algorithm widely employed across various applications. In +some challenging BO tasks, input uncertainty arises due to the inevitable randomness in the optimization process, such +as machining errors, execution noise, or contextual variability. This uncertainty deviates the input from the intended +value before evaluation, resulting in significant performance fluctuations in the final result. In this paper, we +introduce a novel robust Bayesian Optimization algorithm, AIRBO, which can effectively identify a robust optimum that +performs consistently well under arbitrary input uncertainty. Our method directly models the uncertain inputs of +arbitrary distributions by empowering the Gaussian Process with the Maximum Mean Discrepancy (MMD) and further +accelerates the posterior inference via Nystrom approximation. Rigorous theoretical regret bound is established under +MMD estimation error and extensive experiments on synthetic functions and real problems demonstrate that our approach +can handle various input uncertainties and achieve state-of-the-art performance. + +# Reinforcement Learning Research + +## [Enhancing Reinforcement Learning Agents with Local Guides](./RLLG) + +Codebase associated +to [paper: Enhancing Reinforcement Learning Agents with Local Guides](https://hal.science/hal-04052358/file/Final_Reinforcement_Learning_with_Local_Guides.pdf) + +##### Abstract + +This paper addresses the problem of integrating local guide policies into a Reinforcement Learning agent. For this, we +show how +to adapt existing algorithms to this setting before introducing a +novel algorithm based on a noisy policy-switching procedure. This +approach builds on a proper Approximate Policy Evaluation (APE) +scheme to provide a perturbation that carefully leads the local +guides towards better actions. We evaluated our method on a set +of classical Reinforcement Learning problems, including safetycritical systems where the agent cannot enter some areas +at the +risk of triggering catastrophic consequences. In all the proposed +environments, our agent proved to be efficient at leveraging those +policies to improve the performance of any APE-based Reinforcement Learning algorithm, especially in its first learning +stages. + +## [Sauté RL and Simmer RL: Safe Reinforcement Learning Using Safety State Augmentation](./SIMMER) + +Codebase associated to: [Sauté RL: Almost Surely Safe RL Using State Augmentation](https://arxiv.org/pdf/2202.06558.pdf) +and [Enhancing Safe Exploration Using Safety State Augmentation](https://arxiv.org/pdf/2206.02675.pdf). + +##### Abstract for Sauté RL: Almost Surely Safe RL Using State Augmentation (ICML 2022) + +Satisfying safety constraints almost surely (or with probability one) can be critical for deployment of Reinforcement +Learning (RL) in real-life applications. For example, plane landing and take-off should ideally occur with probability +one. We address the problem by introducing Safety Augmented (Saute) Markov Decision Processes (MDPs), where the safety +constraints are eliminated by augmenting them into the state-space and reshaping the objective. We show that Saute MDP +satisfies the Bellman equation and moves us closer to solving Safe RL with constraints satisfied almost surely. We argue +that Saute MDP allows to view Safe RL problem from a different perspective enabling new features. For instance, our +approach has a plug-and-play nature, i.e., any RL algorithm can be "sauteed". Additionally, state augmentation allows +for policy generalization across safety constraints. We finally show that Saute RL algorithms can outperform their +state-of-the-art counterparts when constraint satisfaction is of high importance. + +##### Abstract for Effects of Safety State Augmentation on Safe Exploration (NeurIPS 2022) + +Safe exploration is a challenging and important problem in model-free reinforcement learning (RL). Often the safety cost +is sparse and unknown, which unavoidably leads to constraint violations -- a phenomenon ideally to be avoided in +safety-critical applications. We tackle this problem by augmenting the state-space with a safety state, which is +nonnegative if and only if the constraint is satisfied. The value of this state also serves as a distance toward +constraint violation, while its initial value indicates the available safety budget. This idea allows us to derive +policies for scheduling the safety budget during training. We call our approach Simmer (Safe policy IMproveMEnt for +RL) to reflect the careful nature of these schedules. We apply this idea to two safe RL problems: RL with constraints +imposed on an average cost, and RL with constraints imposed on a cost with probability one. Our experiments suggest +that simmering a safe algorithm can improve safety during training for both settings. We further show that Simmer can +stabilize training and improve the performance of safe RL with average constraints. + +## [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](./PMDB) + +Code associdated +to: [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](https://nips.cc/Conferences/2022/Schedule?showEvent=54842) +accepted +at **NeurIPS (2022)** conference. + +#### Abstract + +Model-based offline reinforcement learning (RL) aims to find highly rewarding policy, by leveraging a previously +collected static dataset and a dynamics model. While learned through reuse of static dataset, the dynamics model's +generalization ability hopefully promotes policy learning if properly utilized. To that end, several works propose to +quantify the uncertainty of predicted dynamics, and explicitly apply it to penalize reward. However, as the dynamics and +the reward are intrinsically different factors in context of MDP, characterizing the impact of dynamics uncertainty +through reward penalty may incur unexpected tradeoff between model utilization and risk avoidance. In this work, we +instead maintain a belief distribution over dynamics, and evaluate/optimize policy through biased sampling from the +belief. The sampling procedure, biased towards pessimism, is derived based on an alternating Markov game formulation +of offline RL. We formally show that the biased sampling naturally induces an updated dynamics belief with +policy-dependent reweighting factor, termed *Pessimism-Modulated Dynamics Belief*. To improve policy, we devise an +iterative regularized policy optimization algorithm for the game, with guarantee of monotonous improvement under certain +condition. To make practical, we further devise an offline RL algorithm to approximately find the solution. Empirical +results show that the proposed approach achieves state-of-the-art performance on a wide range of benchmark tasks. From 76b38a95dd747e455d5e134d9d4123b05233f570 Mon Sep 17 00:00:00 2001 From: Yuhui Date: Mon, 8 Jul 2024 15:45:08 +0100 Subject: [PATCH 5/5] Add ROS interafce on ROS side --- ROSLLM/ros_agent/CMakeLists.txt | 28 +++++ ROSLLM/ros_agent/README.md | 33 ++++++ .../ros_agent/example/agentactionservice.py | 29 +++++ ROSLLM/ros_agent/example/publish_time.py | 21 ++++ ROSLLM/ros_agent/package.xml | 21 ++++ ROSLLM/ros_agent/scripts/command_window.py | 25 ++++ ROSLLM/ros_agent/scripts/ros_agent_node.py | 110 ++++++++++++++++++ ROSLLM/ros_agent/srv/HandleAgentAction.srv | 5 + 8 files changed, 272 insertions(+) create mode 100644 ROSLLM/ros_agent/CMakeLists.txt create mode 100644 ROSLLM/ros_agent/README.md create mode 100755 ROSLLM/ros_agent/example/agentactionservice.py create mode 100644 ROSLLM/ros_agent/example/publish_time.py create mode 100644 ROSLLM/ros_agent/package.xml create mode 100644 ROSLLM/ros_agent/scripts/command_window.py create mode 100644 ROSLLM/ros_agent/scripts/ros_agent_node.py create mode 100644 ROSLLM/ros_agent/srv/HandleAgentAction.srv diff --git a/ROSLLM/ros_agent/CMakeLists.txt b/ROSLLM/ros_agent/CMakeLists.txt new file mode 100644 index 00000000..ded38f52 --- /dev/null +++ b/ROSLLM/ros_agent/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.0.2) +project(ros_agent) + +find_package(catkin REQUIRED COMPONENTS + rospy + std_msgs + message_generation +) + +# catkin_python_setup() + +add_service_files( + FILES + HandleAgentAction.srv +) + +generate_messages( + DEPENDENCIES + std_msgs +) + +catkin_package( + CATKIN_DEPENDS message_runtime +) + +include_directories( + ${catkin_INCLUDE_DIRS} +) \ No newline at end of file diff --git a/ROSLLM/ros_agent/README.md b/ROSLLM/ros_agent/README.md new file mode 100644 index 00000000..a5015936 --- /dev/null +++ b/ROSLLM/ros_agent/README.md @@ -0,0 +1,33 @@ +# Agent interface + +This packge allow you to communicate with the Agent + +## Documentation + +### To start the Flask communication node: +In ROS environment run + +``` +python ../scripts/ros_agent_node.py +``` + +Please refer the document in the agent for strating the agent. + +### To stop the node: +Tpye ``exit`` in the human input + +## Examples: +To publish current time to the obsercvation: +In ROS environment run + +``` +python ../example/publish_time.py +``` + + +To strat the action service: +In ROS environment run + +``` +python ../example/agentactionservice.py +``` \ No newline at end of file diff --git a/ROSLLM/ros_agent/example/agentactionservice.py b/ROSLLM/ros_agent/example/agentactionservice.py new file mode 100755 index 00000000..86460913 --- /dev/null +++ b/ROSLLM/ros_agent/example/agentactionservice.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +import rospy +from ros_agent.srv import HandleAgentAction, HandleAgentActionResponse + +def handle_action(req): + """ + Process the action request and return the response. + This function simulates action processing by returning a success status, + a message, and a reward. + """ + print("Received action request: {}".format(req.action)) + + # Here you would add the logic to process the action, e.g., controlling a robot or running an algorithm + response_message = "Action processed successfully" + reward = 1.0 # Example fixed reward; adjust based on actual action processing logic + + return HandleAgentActionResponse(success=True, response=response_message, reward=reward) + +def action_service(): + rospy.init_node('agent_action_service') + + # Create the service 'handle_agent_action' and specify the handler function + s = rospy.Service('handle_agent_action', HandleAgentAction, handle_action) + + print("Service 'handle_agent_action' ready to handle requests.") + rospy.spin() # Keep the service open. + +if __name__ == "__main__": + action_service() diff --git a/ROSLLM/ros_agent/example/publish_time.py b/ROSLLM/ros_agent/example/publish_time.py new file mode 100644 index 00000000..d8091e7c --- /dev/null +++ b/ROSLLM/ros_agent/example/publish_time.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +import rospy +from std_msgs.msg import String +import datetime + +def publish_timestamp(): + rospy.init_node('timestamp_publisher', anonymous=True) + publisher = rospy.Publisher('agent_environment', String, queue_size=10) + rate = rospy.Rate(5) # Frequency of 1 Hz + + while not rospy.is_shutdown(): + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + rospy.loginfo(f"Publishing current timestamp: {current_time}") + publisher.publish(current_time) + rate.sleep() + +if __name__ == '__main__': + try: + publish_timestamp() + except rospy.ROSInterruptException: + pass diff --git a/ROSLLM/ros_agent/package.xml b/ROSLLM/ros_agent/package.xml new file mode 100644 index 00000000..611e6d0a --- /dev/null +++ b/ROSLLM/ros_agent/package.xml @@ -0,0 +1,21 @@ + + + ros_agent + 0.0.0 + The ros_agent package + + hyu + cmower + + TODO + + hyu + cmower + + catkin + rospy + std_msgs + message_generation + message_runtime + + diff --git a/ROSLLM/ros_agent/scripts/command_window.py b/ROSLLM/ros_agent/scripts/command_window.py new file mode 100644 index 00000000..9dffc984 --- /dev/null +++ b/ROSLLM/ros_agent/scripts/command_window.py @@ -0,0 +1,25 @@ +from flask import Flask, request, jsonify + +app = Flask(__name__) + +@app.route('/', methods=['POST']) +def receive_string(): + if request.method == 'POST': + # Receive the JSON data + data = request.get_json() + + # Extract the string from the JSON data + received_string = data.get('msg') + + # Prompt the user to input another string + user_input_string = input(f"{received_string}\n") + + # Prepare the response JSON data + response_data = { + 'received_string': received_string, + 'user_input_string': user_input_string + } + return jsonify(response_data) + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5002, debug=True, use_reloader=False) diff --git a/ROSLLM/ros_agent/scripts/ros_agent_node.py b/ROSLLM/ros_agent/scripts/ros_agent_node.py new file mode 100644 index 00000000..d50daa89 --- /dev/null +++ b/ROSLLM/ros_agent/scripts/ros_agent_node.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +import rospy +import os +import signal + +from ros_agent.srv import HandleAgentAction, HandleAgentActionRequest +from flask import Flask, request, jsonify + +from std_srvs.srv import Trigger +from std_msgs.msg import String + +class AgentApi: + + def __init__(self, node): + self.node = node + self.app = Flask(__name__) + self.setup_routes() + + def setup_routes(self): + self.app.add_url_rule('/llmreq', 'handle_request_llm', self.handle_request_llm, methods=['POST']) + self.app.add_url_rule('/rosenv', 'handle_request_env', self.handle_request_env, methods=['POST']) + + def handle_request_llm(self): + if not request.json or 'action' not in request.json: + return jsonify({"obs": "Request body must contain 'action' field", + "reward":0,'success':False}), 400 + + action = request.json['action'] + success, resp, reward = self.node.handle_action(action) + print("Agent response:",action) + return jsonify({"done": success, "obs": resp, "reward": reward}) + + def handle_request_env(self): + print("handle env request OK") + self.node.request_human_feedback() + obs = self.node.get_combined_observation() + # print(obs) + return jsonify({"obs": obs, 'success': True if obs else False}) + + + def run_flask_app(self): + self.app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=False) + + +class Node: + + + def __init__(self): + rospy.init_node("ros_agent_node", anonymous=True) + self.srv_name = "handle_agent_action" + self.latest_env_observation = "No observation yet" + self.latest_human_feedback = "No human input yet" + rospy.Subscriber("agent_environment", String, self.observation_callback) + # rospy.Subscriber("human_feedback", String, self.feedback_callback) # Subscribe to the human_feedback topic + self.action_publisher = rospy.Publisher("agent_action", String, queue_size=10) + self.api = AgentApi(self) + self.api.run_flask_app() + rospy.loginfo("initialized ros_agent node") + + def handle_action(self, action): + try: + self.action_publisher.publish(action) # Publish the action to the ROS network + handle_agent_action = rospy.ServiceProxy(self.srv_name, HandleAgentAction) + req = HandleAgentActionRequest(action=action) + resp = handle_agent_action(req) + rospy.loginfo(f"Action response: {resp.response}") + return resp.success, resp.response, resp.reward + except rospy.ServiceException as e: + success = False + response = f"handling request failed: {e}" + rospy.logwarn(f"handling request failed: {e}") + return success, response, 0.0 + + def request_human_feedback(self): + """Prompt for human feedback from the terminal.""" + self.latest_human_feedback = input(" Human, please enter input: ") + if self.latest_human_feedback.lower() == "exit": + print("Killing program...") + os.kill(os.getpid(), signal.SIGKILL) + + + def observation_callback(self, msg): + """Callback function to update the latest observed state.""" + self.latest_env_observation = msg.data + + # To read the humanfeedback from a topic + # def feedback_callback(self, msg): + # """Callback function to update the latest human feedback.""" + # self.latest_human_feedback = msg.data + + + def get_combined_observation(self): + """Combine the latest observation with the latest human feedback.""" + combined = f"Environment Observation: {self.latest_env_observation} | Human Input: {self.latest_human_feedback}" + print(f"Message to Agent: Environment Observation: {self.latest_env_observation} | Human Input: {self.latest_human_feedback} \n") + return combined + + + def spin(self): + while not rospy.is_shutdown(): + self.request_human_feedback() + rospy.spin() + + +def main(): + Node().spin() + + +if __name__ == "__main__": + main() diff --git a/ROSLLM/ros_agent/srv/HandleAgentAction.srv b/ROSLLM/ros_agent/srv/HandleAgentAction.srv new file mode 100644 index 00000000..1a9897ae --- /dev/null +++ b/ROSLLM/ros_agent/srv/HandleAgentAction.srv @@ -0,0 +1,5 @@ +string action +--- +bool success +string response +float64 reward