Publish 0.4.9, document LangGraph (#362)

arcticfly · web-flow · commit 57e7de78771f · 2025-08-25T13:01:42.000-07:00
* Update version number

* Document LangGraph Integration

* Hide integration page
diff --git a/docs/docs.json b/docs/docs.json
@@ -69,15 +69,22 @@
       },
       {
         "group": "Tutorials",
-        "pages": ["tutorials/summarizer"]
+        "pages": [
+          "tutorials/summarizer"
+        ]
       },
       {
         "group": "Resources",
-        "pages": ["resources/models", "resources/glossary"]
+        "pages": [
+          "resources/models",
+          "resources/glossary"
+        ]
       },
       {
         "group": "Experimental",
-        "pages": ["experimental/gspo"]
+        "pages": [
+          "experimental/gspo"
+        ]
       }
     ]
   },
@@ -88,4 +95,4 @@
     "bluesky": "https://bsky.app/profile/openpipe.bsky.social",
     "github": "https://github.com/openpipe/ART"
   }
-}
+}
diff --git a/docs/integrations/langgraph-integration.mdx b/docs/integrations/langgraph-integration.mdx
@@ -0,0 +1,228 @@
+---
+title: "🦜🔗 LangGraph"
+description: "Build and train sophisticated AI agents using LangGraph with ART's reinforcement learning"
+---
+
+# LangGraph Integration
+
+ART's LangGraph integration enables you to build sophisticated, multi-step AI agents that learn and improve through reinforcement training. By combining LangGraph's powerful agent framework with ART's training capabilities, you can create agents that reason, use tools, and adapt their behavior over time.
+
+## Why Use ART with LangGraph?
+
+LangGraph provides an excellent framework for building ReAct-style agents that can reason through complex tasks step-by-step. However, getting these agents to perform optimally often requires extensive prompt engineering and manual tuning. ART's integration with LangGraph addresses this by:
+
+- **Automatic behavior improvement**: Train your agents to get better at multi-step reasoning without manual prompt tuning
+- **Tool usage optimization**: Learn when and how to use tools more effectively through reinforcement learning
+- **Adaptive decision making**: Agents learn to make better choices about which actions to take in different situations
+- **Scalable training**: Train on diverse scenarios to build robust, generalizable agent behaviors
+
+## Key Features
+
+- **Seamless integration**: Drop-in replacement for LangGraph's LLM initialization
+- **Automatic logging**: Captures all agent interactions for training data generation
+- **Multi-step trajectory support**: Handles complex agent workflows with tool calls and reasoning steps
+- **RULER compatibility**: Use ART's general-purpose reward function to train agents without hand-crafted rewards
+
+## Basic Usage
+
+Here's how to integrate ART with your existing LangGraph agent:
+
+```python
+import art
+from art.langgraph import wrap_rollout, init_chat_model
+from art.local import LocalBackend
+from langgraph import create_react_agent
+
+# Define your tools
+def search_inbox(query: str) -> str:
+    """Search for emails matching the query."""
+    # Your search implementation
+    return f"Found emails matching: {query}"
+
+def read_email(email_id: str) -> str:
+    """Read a specific email by ID."""
+    # Your email reading implementation
+    return f"Email content for {email_id}"
+
+tools = [search_inbox, read_email]
+
+async def train_email_agent():
+    with LocalBackend() as backend:
+        # Create your trainable model
+        model = art.TrainableModel(
+            name="email-agent-langgraph",
+            project="email-search-agent",
+            base_model="Qwen/Qwen2.5-7B-Instruct",
+        )
+
+        await backend.register_model(model)
+
+        # Define your rollout function
+        @wrap_rollout(model)
+        async def run_agent(scenario: str) -> art.Trajectory:
+            # Create the LangGraph agent with ART's LLM wrapper
+            agent = create_react_agent(init_chat_model(), tools)
+
+            # Run the agent
+            result = await agent.ainvoke({"messages": [("user", scenario)]})
+
+            # Return trajectory (automatically captured by wrap_rollout)
+            return art.Trajectory()
+
+        # Generate training data
+        scenarios = [
+            "Find emails from John about the quarterly report",
+            "Search for emails containing budget discussions from last week",
+            "Find the latest email from Sarah and summarize it",
+        ]
+
+        for scenario in scenarios:
+            await run_agent(scenario)
+
+        # Start training with RULER
+        await art.train(model, reward_function="ruler")
+
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(train_email_agent())
+```
+
+## How It Works
+
+The ART-LangGraph integration works through two main components:
+
+### 1. LLM Wrapper (`init_chat_model`)
+
+Replaces LangGraph's standard LLM initialization with ART's logging-enabled wrapper:
+
+```python
+# Standard LangGraph
+from langchain_openai import ChatOpenAI
+llm = ChatOpenAI(model="gpt-4")
+
+# With ART integration
+from art.langgraph import init_chat_model
+llm = init_chat_model()  # Automatically uses your model's inference settings
+```
+
+The wrapper captures all LLM interactions, including:
+
+- Input messages and prompts
+- Generated responses and tool calls
+- Tool execution results
+- Multi-step reasoning chains
+
+### 2. Rollout Wrapper (`wrap_rollout`)
+
+Automatically converts your agent execution into ART trajectories:
+
+```python
+@wrap_rollout(model)
+async def run_agent(scenario: str) -> art.Trajectory:
+    # Your agent logic here
+    agent = create_react_agent(init_chat_model(), tools)
+    result = await agent.ainvoke({"messages": [("user", scenario)]})
+    return art.Trajectory()  # Automatically populated from logs
+```
+
+The wrapper:
+
+- Creates unique execution threads for each agent run
+- Logs all intermediate steps and tool calls
+- Converts LangGraph messages to ART's training format
+- Handles complex multi-turn conversations automatically
+
+## Advanced Example: Email Search Agent
+
+Here's a more complete example of training an email search agent:
+
+```python
+import art
+from art.langgraph import wrap_rollout, init_chat_model
+from art.local import LocalBackend
+from langgraph import create_react_agent
+from typing import List
+
+def search_inbox(query: str, limit: int = 5) -> str:
+    """Search emails with improved functionality."""
+    # Simulate email search with realistic results
+    results = [
+        f"Email {i}: Subject matching '{query}' from user@example.com"
+        for i in range(min(limit, 3))
+    ]
+    return "\n".join(results) if results else "No emails found."
+
+def read_email(email_id: str) -> str:
+    """Read email with error handling."""
+    if not email_id.isdigit():
+        return "Error: Invalid email ID format"
+    return f"Email {email_id}: [Email content here...]"
+
+def return_final_answer(answer: str) -> str:
+    """Return the final answer to the user."""
+    return f"Final Answer: {answer}"
+
+tools = [search_inbox, read_email, return_final_answer]
+
+async def train_advanced_email_agent():
+    with LocalBackend() as backend:
+        model = art.TrainableModel(
+            name="advanced-email-agent",
+            project="email-agents",
+            base_model="Qwen/Qwen2.5-7B-Instruct",
+        )
+
+        await backend.register_model(model)
+
+        @wrap_rollout(model)
+        async def run_email_agent(scenario: str) -> art.Trajectory:
+            agent = create_react_agent(init_chat_model(), tools)
+
+            result = await agent.ainvoke({
+                "messages": [("user", scenario)]
+            })
+
+            return art.Trajectory()
+
+        # Diverse training scenarios
+        scenarios = [
+            "Find the most recent email from the finance team about Q4 budget",
+            "Search for emails containing 'meeting' and summarize the key points",
+            "Look for urgent emails from management and provide a brief overview",
+            "Find emails about project deadlines and list them by priority",
+        ]
+
+        # Generate training trajectories
+        for scenario in scenarios:
+            trajectory = await run_email_agent(scenario)
+            print(f"Generated trajectory for: {scenario}")
+
+        # Train with RULER
+        await art.train(model, reward_function="ruler")
+
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(train_advanced_email_agent())
+```
+
+## Best Practices
+
+### Agent Design
+
+- **Clear tool descriptions**: Ensure your tool functions have descriptive docstrings
+- **Error handling**: Include proper error handling in your tools for robust training
+- **Final answer pattern**: Use a dedicated tool for returning final answers to users
+
+### Training Data
+
+- **Diverse scenarios**: Create varied training scenarios that cover different use cases
+- **Realistic complexity**: Include both simple and complex multi-step tasks
+- **Edge cases**: Add scenarios that test error handling and edge cases
+
+### Performance Optimization
+
+- **Tool efficiency**: Optimize tool execution time since it affects training speed
+- **Batch generation**: Generate multiple trajectories efficiently using async patterns
+- **Resource management**: Monitor memory usage during long training runs
+
+The ART-LangGraph integration makes it straightforward to build and train sophisticated AI agents that improve their performance over time, turning your prototype agents into production-ready intelligent systems.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "openpipe-art"
-version = "0.4.7"
+version = "0.4.9"
 description = "The OpenPipe Agent Reinforcement Training (ART) library"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -30,7 +30,7 @@ backend = [
     "setproctitle>=1.3.6",
     "tblib>=3.0.0",
     "setuptools>=78.1.0",
-    "wandb>=0.19.8",
+    "wandb==0.21.0",
     "polars>=1.26.0",
     "transformers==4.53.2",
     "trl==0.20.0",
diff --git a/uv.lock b/uv.lock

Original file line number	Diff line number	Diff line change
`@@ -69,15 +69,22 @@`
`69`	`69`	`},`
`70`	`70`	`{`
`71`	`71`	`"group": "Tutorials",`
`72`		`- "pages": ["tutorials/summarizer"]`
	`72`	`+ "pages": [`
	`73`	`+ "tutorials/summarizer"`
	`74`	`+ ]`
`73`	`75`	`},`
`74`	`76`	`{`
`75`	`77`	`"group": "Resources",`
`76`		`- "pages": ["resources/models", "resources/glossary"]`
	`78`	`+ "pages": [`
	`79`	`+ "resources/models",`
	`80`	`+ "resources/glossary"`
	`81`	`+ ]`
`77`	`82`	`},`
`78`	`83`	`{`
`79`	`84`	`"group": "Experimental",`
`80`		`- "pages": ["experimental/gspo"]`
	`85`	`+ "pages": [`
	`86`	`+ "experimental/gspo"`
	`87`	`+ ]`
`81`	`88`	`}`
`82`	`89`	`]`
`83`	`90`	`},`
`@@ -88,4 +95,4 @@`
`88`	`95`	`"bluesky": "https://bsky.app/profile/openpipe.bsky.social",`
`89`	`96`	`"github": "https://github.com/openpipe/ART"`
`90`	`97`	`}`
`91`		`-}`
	`98`	`+}`