diff --git a/Agent/README.md b/Agent/README.md index 1522a500..b5b0f6fc 100644 --- a/Agent/README.md +++ b/Agent/README.md @@ -12,3 +12,18 @@ make html ``` There are also several helpful [tutorials](tutorials/) to help you get started with running and customizing Agent. + + +## ROS interface + +To run the agent with Flask interface with ROS + +In conda environment using the following command: + +``` +python ../src/agent/start.py task=ros_task method=direct llm@agent.llm=human +``` + +Replace the ``llm@agent.llm=human`` with your actual model. + +In ROS environment follow the instruction in the ros_pange_agent packge diff --git a/Agent/configs/task/ros_task.yaml b/Agent/configs/task/ros_task.yaml new file mode 100644 index 00000000..58a6f40b --- /dev/null +++ b/Agent/configs/task/ros_task.yaml @@ -0,0 +1,16 @@ +# @package _global_ +agent: + prompt_builder: + template_paths: + - default + + +task: + _target_: agent.tasks.ros_task.ROSTask + name: simplebot_test_env + description: + subtask: null + version: v0.1 + +max_env_steps: 1 +max_episodes: 100 \ No newline at end of file diff --git a/Agent/src/agent/prompts/templates/default/direct_prompt.jinja b/Agent/src/agent/prompts/templates/default/direct_prompt.jinja new file mode 100644 index 00000000..77ff8ef2 --- /dev/null +++ b/Agent/src/agent/prompts/templates/default/direct_prompt.jinja @@ -0,0 +1,3 @@ +Now please answer the question. +Answer in the format +Answer: diff --git a/Agent/src/agent/prompts/templates/default/trajectory.jinja b/Agent/src/agent/prompts/templates/default/trajectory.jinja index 02485bbd..19a80b5b 100644 --- a/Agent/src/agent/prompts/templates/default/trajectory.jinja +++ b/Agent/src/agent/prompts/templates/default/trajectory.jinja @@ -3,4 +3,4 @@ Here is what happened so far: {%- if action %} Action: {{action}} {%- endif %} -Observation: {{memory.retrieve({memory.mem_keys.OBSERVATION: 1.0})}} +Current: {{memory.retrieve({memory.mem_keys.OBSERVATION: 1.0})}} diff --git a/Agent/src/agent/tasks/ros_api.py b/Agent/src/agent/tasks/ros_api.py new file mode 100644 index 00000000..02b5171f --- /dev/null +++ b/Agent/src/agent/tasks/ros_api.py @@ -0,0 +1,67 @@ +import requests + + +class RosApi: + + # Defined on ros side + default_timeout = 3 * 60 # default timeout is 3 minutes + + def __init__(self, timeout=None): + self.timeout = self.default_timeout if timeout is None else timeout + self._response = None + + @staticmethod + def bad_response(obs): + return { + "success": False, + "done": True, + "reward": 0, + "obs": obs, + } + + def send_action(self, action): + url = "http://localhost:5000/llmreq" + self._response = None # reset response + try: + data = {"action": action} + resp = requests.post(url, json=data, timeout=self.timeout) + response = resp.json() + except requests.exceptions.Timeout: + response = self.bad_response("Request timeout.") + except requests.exceptions.RequestException as e: + response = self.bad_response(f"Request exception: {e}") + self._response = response + + def get_env_observation(self): + url = "http://localhost:5000/rosenv" + self._response = None # reset response + try: + data = {"": ""} + resp = requests.post(url, json=data, timeout=self.timeout) + response = resp.json() + except requests.exceptions.Timeout: + response = self.bad_response("Request timeout.") + except requests.exceptions.RequestException as e: + response = self.bad_response(f"Request exception: {e}") + self._response = response + + return response + + def get_feedback(self): + url = "http://localhost:5000/rosfdb" + self._response = None # reset response + try: + data = {"": ""} + resp = requests.post(url, json=data, timeout=self.timeout) + response = resp.json() + except requests.exceptions.Timeout: + response = self.bad_response("Request timeout.") + except requests.exceptions.RequestException as e: + response = self.bad_response(f"Request exception: {e}") + self._response = response + + return response + + def receive_response(self): + assert self._response is not None, "did not receive a response" + return self._response diff --git a/Agent/src/agent/tasks/ros_task.py b/Agent/src/agent/tasks/ros_task.py new file mode 100644 index 00000000..a0c24d4d --- /dev/null +++ b/Agent/src/agent/tasks/ros_task.py @@ -0,0 +1,51 @@ +from agent.tasks import Task +from .ros_api import RosApi +from typing import Any, Dict +import warnings + + +class ROSTask(Task): + + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.ros_api = RosApi() + print(self.ros_api) + self.response = {"reward": 0.0, + "done": False, + "obs": ""} + self.possible_actions = [] + + def answer_parser(self, raw_response: str): + return raw_response + + def is_complete(self): + return self.response['done'] + + def reset(self, next_subtask: str | None = None) -> Dict[str, str]: + """Reset the environment and return the initial observation.""" + + if next_subtask is not None: + warnings.warn("ros_task does not support subtasks, ignoring subtask") + + response= self.ros_api.get_env_observation() + + return { + "_text_obs": response, + "_available_actions": self.possible_actions + } + + def get_observation(self): + obs = self.ros_api.get_env_observation() + fdb = self.ros_api.get_feedback() + return { + "_available_actions": obs, + "_available_actions": fdb + } + + def step(self, action): + print(action) + self.ros_api.send_action(action) + self.response = self.ros_api.receive_response() + print(self.response['obs']) + return {}, self.response["reward"], self.response["done"] + diff --git a/README.md b/README.md index 873fde76..412009cc 100644 --- a/README.md +++ b/README.md @@ -17,8 +17,6 @@ Huawei, Noah's Ark Lab. - [Enhancing Reinforcement Learning Agents with Local Guides](RLLG) - [Sauté RL and Simmer RL: Safe Reinforcement Learning Using Safety State Augmentation ](./SIMMER) - [Model-Based Offline Reinforcement Learning with Pessimism-Modulated Dynamics Belief](./PMDB) -- Robotics Research - - [ROS-LLM: A ROS framework for embodied AI with task feedback and structured reasoning](./ROSLLM) Further instructions are provided in the README files associated to each project. @@ -91,7 +89,7 @@ design, and logic synthesis for electronic design automation. ## [RDUCB: High Dimensional Bayesian Optimisation with Random Decompositions](./RDUCB)

- +

Codebase associated @@ -168,7 +166,7 @@ efficiency and QoR values.

- +

Codebase associated @@ -306,12 +304,3 @@ policy-dependent reweighting factor, termed *Pessimism-Modulated Dynamics Belief iterative regularized policy optimization algorithm for the game, with guarantee of monotonous improvement under certain condition. To make practical, we further devise an offline RL algorithm to approximately find the solution. Empirical results show that the proposed approach achieves state-of-the-art performance on a wide range of benchmark tasks. - ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- - -## Codebase Contributors - - Current contributors: Antoine Grosnit, Alexandre Max Maravel, Taher Jafferjee, Wenlong Lyu, Kaiyang Guo, Juliusz Ziomek, Paul Daoudi, Merwan Barlier, Christopher E. Mower. - - Alumni / External contributors: Alexander I. Cowen-Rivers, Kamil Dreczkowski, Aivar Sootla, Ryan Rhys Griffiths, Zhi Wang, Ludovic Dos Santos, Bogdan Robu, Christophe Prieur. - diff --git a/ROSLLM/ros_agent/CMakeLists.txt b/ROSLLM/ros_agent/CMakeLists.txt new file mode 100644 index 00000000..ded38f52 --- /dev/null +++ b/ROSLLM/ros_agent/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.0.2) +project(ros_agent) + +find_package(catkin REQUIRED COMPONENTS + rospy + std_msgs + message_generation +) + +# catkin_python_setup() + +add_service_files( + FILES + HandleAgentAction.srv +) + +generate_messages( + DEPENDENCIES + std_msgs +) + +catkin_package( + CATKIN_DEPENDS message_runtime +) + +include_directories( + ${catkin_INCLUDE_DIRS} +) \ No newline at end of file diff --git a/ROSLLM/ros_agent/README.md b/ROSLLM/ros_agent/README.md new file mode 100644 index 00000000..a5015936 --- /dev/null +++ b/ROSLLM/ros_agent/README.md @@ -0,0 +1,33 @@ +# Agent interface + +This packge allow you to communicate with the Agent + +## Documentation + +### To start the Flask communication node: +In ROS environment run + +``` +python ../scripts/ros_agent_node.py +``` + +Please refer the document in the agent for strating the agent. + +### To stop the node: +Tpye ``exit`` in the human input + +## Examples: +To publish current time to the obsercvation: +In ROS environment run + +``` +python ../example/publish_time.py +``` + + +To strat the action service: +In ROS environment run + +``` +python ../example/agentactionservice.py +``` \ No newline at end of file diff --git a/ROSLLM/ros_agent/example/agentactionservice.py b/ROSLLM/ros_agent/example/agentactionservice.py new file mode 100755 index 00000000..86460913 --- /dev/null +++ b/ROSLLM/ros_agent/example/agentactionservice.py @@ -0,0 +1,29 @@ +#!/usr/bin/env python +import rospy +from ros_agent.srv import HandleAgentAction, HandleAgentActionResponse + +def handle_action(req): + """ + Process the action request and return the response. + This function simulates action processing by returning a success status, + a message, and a reward. + """ + print("Received action request: {}".format(req.action)) + + # Here you would add the logic to process the action, e.g., controlling a robot or running an algorithm + response_message = "Action processed successfully" + reward = 1.0 # Example fixed reward; adjust based on actual action processing logic + + return HandleAgentActionResponse(success=True, response=response_message, reward=reward) + +def action_service(): + rospy.init_node('agent_action_service') + + # Create the service 'handle_agent_action' and specify the handler function + s = rospy.Service('handle_agent_action', HandleAgentAction, handle_action) + + print("Service 'handle_agent_action' ready to handle requests.") + rospy.spin() # Keep the service open. + +if __name__ == "__main__": + action_service() diff --git a/ROSLLM/ros_agent/example/publish_time.py b/ROSLLM/ros_agent/example/publish_time.py new file mode 100644 index 00000000..d8091e7c --- /dev/null +++ b/ROSLLM/ros_agent/example/publish_time.py @@ -0,0 +1,21 @@ +#!/usr/bin/env python3 +import rospy +from std_msgs.msg import String +import datetime + +def publish_timestamp(): + rospy.init_node('timestamp_publisher', anonymous=True) + publisher = rospy.Publisher('agent_environment', String, queue_size=10) + rate = rospy.Rate(5) # Frequency of 1 Hz + + while not rospy.is_shutdown(): + current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + rospy.loginfo(f"Publishing current timestamp: {current_time}") + publisher.publish(current_time) + rate.sleep() + +if __name__ == '__main__': + try: + publish_timestamp() + except rospy.ROSInterruptException: + pass diff --git a/ROSLLM/ros_agent/package.xml b/ROSLLM/ros_agent/package.xml new file mode 100644 index 00000000..611e6d0a --- /dev/null +++ b/ROSLLM/ros_agent/package.xml @@ -0,0 +1,21 @@ + + + ros_agent + 0.0.0 + The ros_agent package + + hyu + cmower + + TODO + + hyu + cmower + + catkin + rospy + std_msgs + message_generation + message_runtime + + diff --git a/ROSLLM/ros_agent/scripts/command_window.py b/ROSLLM/ros_agent/scripts/command_window.py new file mode 100644 index 00000000..9dffc984 --- /dev/null +++ b/ROSLLM/ros_agent/scripts/command_window.py @@ -0,0 +1,25 @@ +from flask import Flask, request, jsonify + +app = Flask(__name__) + +@app.route('/', methods=['POST']) +def receive_string(): + if request.method == 'POST': + # Receive the JSON data + data = request.get_json() + + # Extract the string from the JSON data + received_string = data.get('msg') + + # Prompt the user to input another string + user_input_string = input(f"{received_string}\n") + + # Prepare the response JSON data + response_data = { + 'received_string': received_string, + 'user_input_string': user_input_string + } + return jsonify(response_data) + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5002, debug=True, use_reloader=False) diff --git a/ROSLLM/ros_agent/scripts/ros_agent_node.py b/ROSLLM/ros_agent/scripts/ros_agent_node.py new file mode 100644 index 00000000..d50daa89 --- /dev/null +++ b/ROSLLM/ros_agent/scripts/ros_agent_node.py @@ -0,0 +1,110 @@ +#!/usr/bin/env python3 +import rospy +import os +import signal + +from ros_agent.srv import HandleAgentAction, HandleAgentActionRequest +from flask import Flask, request, jsonify + +from std_srvs.srv import Trigger +from std_msgs.msg import String + +class AgentApi: + + def __init__(self, node): + self.node = node + self.app = Flask(__name__) + self.setup_routes() + + def setup_routes(self): + self.app.add_url_rule('/llmreq', 'handle_request_llm', self.handle_request_llm, methods=['POST']) + self.app.add_url_rule('/rosenv', 'handle_request_env', self.handle_request_env, methods=['POST']) + + def handle_request_llm(self): + if not request.json or 'action' not in request.json: + return jsonify({"obs": "Request body must contain 'action' field", + "reward":0,'success':False}), 400 + + action = request.json['action'] + success, resp, reward = self.node.handle_action(action) + print("Agent response:",action) + return jsonify({"done": success, "obs": resp, "reward": reward}) + + def handle_request_env(self): + print("handle env request OK") + self.node.request_human_feedback() + obs = self.node.get_combined_observation() + # print(obs) + return jsonify({"obs": obs, 'success': True if obs else False}) + + + def run_flask_app(self): + self.app.run(host='0.0.0.0', port=5000, debug=True, use_reloader=False) + + +class Node: + + + def __init__(self): + rospy.init_node("ros_agent_node", anonymous=True) + self.srv_name = "handle_agent_action" + self.latest_env_observation = "No observation yet" + self.latest_human_feedback = "No human input yet" + rospy.Subscriber("agent_environment", String, self.observation_callback) + # rospy.Subscriber("human_feedback", String, self.feedback_callback) # Subscribe to the human_feedback topic + self.action_publisher = rospy.Publisher("agent_action", String, queue_size=10) + self.api = AgentApi(self) + self.api.run_flask_app() + rospy.loginfo("initialized ros_agent node") + + def handle_action(self, action): + try: + self.action_publisher.publish(action) # Publish the action to the ROS network + handle_agent_action = rospy.ServiceProxy(self.srv_name, HandleAgentAction) + req = HandleAgentActionRequest(action=action) + resp = handle_agent_action(req) + rospy.loginfo(f"Action response: {resp.response}") + return resp.success, resp.response, resp.reward + except rospy.ServiceException as e: + success = False + response = f"handling request failed: {e}" + rospy.logwarn(f"handling request failed: {e}") + return success, response, 0.0 + + def request_human_feedback(self): + """Prompt for human feedback from the terminal.""" + self.latest_human_feedback = input(" Human, please enter input: ") + if self.latest_human_feedback.lower() == "exit": + print("Killing program...") + os.kill(os.getpid(), signal.SIGKILL) + + + def observation_callback(self, msg): + """Callback function to update the latest observed state.""" + self.latest_env_observation = msg.data + + # To read the humanfeedback from a topic + # def feedback_callback(self, msg): + # """Callback function to update the latest human feedback.""" + # self.latest_human_feedback = msg.data + + + def get_combined_observation(self): + """Combine the latest observation with the latest human feedback.""" + combined = f"Environment Observation: {self.latest_env_observation} | Human Input: {self.latest_human_feedback}" + print(f"Message to Agent: Environment Observation: {self.latest_env_observation} | Human Input: {self.latest_human_feedback} \n") + return combined + + + def spin(self): + while not rospy.is_shutdown(): + self.request_human_feedback() + rospy.spin() + + +def main(): + Node().spin() + + +if __name__ == "__main__": + main() diff --git a/ROSLLM/ros_agent/srv/HandleAgentAction.srv b/ROSLLM/ros_agent/srv/HandleAgentAction.srv new file mode 100644 index 00000000..1a9897ae --- /dev/null +++ b/ROSLLM/ros_agent/srv/HandleAgentAction.srv @@ -0,0 +1,5 @@ +string action +--- +bool success +string response +float64 reward