NVIDIA-NeMo · yyu22 · Feb 9, 2026 · Feb 9, 2026 · Feb 12, 2026 · Feb 14, 2026
diff --git a/resources_servers/wordle/app.py b/resources_servers/wordle/app.py
diff --git a/resources_servers/wordle/configs/wordle.yaml b/resources_servers/wordle/configs/wordle.yaml
@@ -0,0 +1,33 @@
+wordle:
+  resources_servers:
+    wordle:
+      entrypoint: app.py
+      domain: games
+      verified: false
+      description: Wordle word-guessing game environment for training LLMs
+      value: Improve strategic reasoning and letter pattern recognition
+
+wordle_simple_agent:
+  responses_api_agents:
+    wordle:
+      entrypoint: app.py
+      resources_server:
+        type: resources_servers
+        name: wordle
+      model_server:
+        type: responses_api_models
+        name: policy_model
+      datasets:
+        - name: train
+          type: train
+          jsonl_fpath: resources_servers/wordle/data/train.jsonl
+          license: Apache 2.0
+        - name: validation
+          type: validation
+          jsonl_fpath: resources_servers/wordle/data/validation.jsonl
+          license: Apache 2.0
+        - name: example
+          type: example
+          jsonl_fpath: resources_servers/wordle/data/example.jsonl
+          license: Apache 2.0
+      max_steps: 12  # 6 turns × 2 tool calls max per turn
diff --git a/resources_servers/wordle/data/example.jsonl b/resources_servers/wordle/data/example.jsonl
diff --git a/resources_servers/wordle/generate_data.py b/resources_servers/wordle/generate_data.py
@@ -0,0 +1,264 @@
+#!/usr/bin/env python3
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Generate training and validation data for Wordle NemoGym environment.
+
+This script generates JSONL files with Wordle game prompts for training.
+
+Key design decisions:
+- Training data: No target words specified. Server picks randomly from TRAINING_WORDS
+  (2,000 words) at runtime. This gives variety across training runs.
+- Validation data: Fixed target words from VALIDATION_WORDS (315 words, no overlap
+  with training). This ensures reproducible evaluation across training steps.
+
+Usage:
+    python generate_data.py --output_dir data/
+    python generate_data.py --train_samples 2000 --output_dir data/
+"""
+
+import argparse
+import json
+import sys
+from pathlib import Path
+
+# Add parent directories to path for imports
+gym_root = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(gym_root))
+nemo_rl_root = Path(__file__).parent.parent.parent.parent.parent
+sys.path.insert(0, str(nemo_rl_root))
+
+
+# System prompt for Wordle
+# Note: Reasoning mode disabled due to compatibility issues with multi-turn tool calling
+SYSTEM_PROMPT = """You are playing Wordle, a word-guessing game. Your goal is to guess a secret 5-letter word in 6 attempts or fewer.
+
+After each guess, you'll receive feedback:
+- G (Green): Letter is correct and in the right position
+- Y (Yellow): Letter is in the word but in the wrong position
+- _ (Gray): Letter is not in the word
+
+Strategy tips:
+- Start with words containing common letters (E, A, R, T, O, I, N, S)
+- Use the feedback to narrow down possibilities
+- Never repeat a guess
+- Place confirmed green letters in their positions
+- Include yellow letters in different positions
+- Avoid gray (eliminated) letters
+
+IMPORTANT: Always respond with a tool call. Never reply with plain text. After receiving feedback, immediately call submit_guess with your next guess. When you see "won": true or "game_over": true in a response, the game is over — do not make any more tool calls."""
+
+# Single clear user prompt
+USER_PROMPTS = [
+    "Make your first guess.",
+]
+
+# Tool definitions
+TOOLS = [
+    {
+        "type": "function",
+        "name": "submit_guess",
+        "description": "Submit a 5-letter word guess. Returns feedback for each letter: G (green) = correct position, Y (yellow) = wrong position but in word, _ (gray) = not in word.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "guess": {
+                    "type": "string",
+                    "description": "A 5-letter English word to guess"
+                }
+            },
+            "required": ["guess"],
+            "additionalProperties": False
+        },
+        "strict": True
+    },
+    {
+        "type": "function",
+        "name": "check_word_validity",
+        "description": "Check if a word is valid before guessing. This is optional and informational only - it won't affect your game.",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "word": {
+                    "type": "string",
+                    "description": "A word to check for validity"
+                }
+            },
+            "required": ["word"],
+            "additionalProperties": False
+        },
+        "strict": True
+    },
+    {
+        "type": "function",
+        "name": "get_game_state",
+        "description": "Get the current game state including guesses made, feedback received, and accumulated knowledge about the target word.",
+        "parameters": {
+            "type": "object",
+            "properties": {},
+            "required": [],
+            "additionalProperties": False
+        },
+        "strict": True
+    }
+]
+
+
+def create_wordle_entry(
+    user_prompt: str,
+    word_length: int = 5,
+    max_turns: int = 6,
+    custom_target: str = None,
+) -> dict:
+    """Create a single Wordle data entry.
+
+    Args:
+        user_prompt: The user message to start the game
+        word_length: Length of words (default 5)
+        max_turns: Maximum guesses allowed (default 6)
+        custom_target: Optional specific target word. If None, server picks randomly.
+    """
+    entry = {
+        "responses_create_params": {
+            "input": [
+                {"role": "system", "content": SYSTEM_PROMPT},
+                {"role": "user", "content": user_prompt}
+            ],
+            "tools": TOOLS,
+            "parallel_tool_calls": False,
+            "temperature": 1.0
+        },
+        "word_length": word_length,
+        "max_turns": max_turns,
+        "agent_ref": {
+            "type": "responses_api_agents",
+            "name": "wordle_simple_agent"
+        },
+    }
+
+    # Include custom_target if specified (for validation with fixed words)
+    if custom_target:
+        entry["custom_target"] = custom_target
+
+    return entry
+
+
+def generate_training_data(num_samples: int, seed: int = 42) -> list[dict]:
+    """Generate training data WITHOUT target words.
+
+    The server will pick random targets from TRAINING_WORDS (2,000 words)
+    at runtime. This ensures variety across training runs.
+
+    Args:
+        num_samples: Number of entries to generate
+        seed: Random seed for prompt shuffling
+    """
+    import random
+    random.seed(seed)
+
+    entries = []
+    for i in range(num_samples):
+        # Cycle through user prompts for variety
+        user_prompt = USER_PROMPTS[i % len(USER_PROMPTS)]
+        entry = create_wordle_entry(user_prompt, custom_target=None)
+        entries.append(entry)
+
+    return entries
+
+
+def generate_validation_data(seed: int = 43) -> list[dict]:
+    """Generate validation data WITH fixed target words.
+
+    Uses all 315 words from VALIDATION_WORDS (no overlap with training).
+    Each validation entry has a specific target word for reproducible evaluation.
+
+    Args:
+        seed: Random seed for prompt assignment
+    """
+    import random
+    random.seed(seed)
+
+    # Import validation words from the proper split
+    from resources_servers.wordle.wordle_words import VALIDATION_WORDS
+
+    entries = []
+    for i, target_word in enumerate(VALIDATION_WORDS[:100]):
+        # Cycle through user prompts
+        user_prompt = USER_PROMPTS[i % len(USER_PROMPTS)]
+        entry = create_wordle_entry(user_prompt, custom_target=target_word)
+        entries.append(entry)
+
+    return entries
+
+
+def save_jsonl(entries: list[dict], filepath: Path) -> None:
+    """Save entries to a JSONL file."""
+    filepath.parent.mkdir(parents=True, exist_ok=True)
+    with open(filepath, 'w') as f:
+        for entry in entries:
+            f.write(json.dumps(entry) + '\n')
+    print(f"Saved {len(entries)} entries to {filepath}")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Generate Wordle training and validation data",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Word Split:
+  - TRAINING_WORDS: 2,000 words (server picks randomly at runtime)
+  - VALIDATION_WORDS: 315 words (fixed in JSONL, no overlap with training)
+
+Examples:
+  python generate_data.py                          # Default: 1000 train, all 315 val
+  python generate_data.py --train_samples 2000     # More training samples
+        """
+    )
+    parser.add_argument("--train_samples", type=int, default=1000,
+                        help="Number of training samples (default: 1000)")
+    parser.add_argument("--output_dir", type=str, default="data",
+                        help="Output directory for JSONL files (default: data)")
+    parser.add_argument("--seed", type=int, default=42,
+                        help="Random seed (default: 42)")
+    args = parser.parse_args()
+
+    output_dir = Path(args.output_dir)
+
+    # Generate training data (no target words - server picks from TRAINING_WORDS)
+    print(f"Generating {args.train_samples} training samples...")
+    print("  - No target words in data (server picks randomly from 2,000 training words)")
+    train_data = generate_training_data(args.train_samples, seed=args.seed)
+    save_jsonl(train_data, output_dir / "train.jsonl")
+
+    # Generate validation data (fixed target words from VALIDATION_WORDS)
+    print(f"\nGenerating validation samples...")
+    print("  - Fixed target words from 315 validation words (no overlap with training)")
+    val_data = generate_validation_data(seed=args.seed + 1)
+    save_jsonl(val_data, output_dir / "validation.jsonl")
+
+    # Generate example data (small subset of validation for quick testing)
+    print(f"\nGenerating example samples...")
+    example_data = val_data[:20]
+    save_jsonl(example_data, output_dir / "example.jsonl")
+
+    print("\nDone!")
+    print(f"\nSummary:")
+    print(f"  Training:   {len(train_data)} samples (targets picked at runtime from 2,000 words)")
+    print(f"  Validation: {len(val_data)} samples (fixed targets, 315 unique words)")
+    print(f"  Example:    {len(example_data)} samples")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/resources_servers/wordle/requirements.txt b/resources_servers/wordle/requirements.txt
@@ -0,0 +1 @@
+-e nemo-gym[dev] @ ../../
diff --git a/resources_servers/wordle/tests/__init__.py b/resources_servers/wordle/tests/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
		# SPDX-License-Identifier: Apache-2.0