Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions trinity/common/workflows/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@
# on-policy distillation workflows
"on_policy_distill_workflow": "trinity.common.workflows.on_policy_distill_workflow.OnPolicyDistillWorkflow",
"on_policy_distill_math_workflow": "trinity.common.workflows.on_policy_distill_workflow.OnPolicyDistillMathWorkflow",
# custom workflows
"sudoku_workflow": "trinity.common.workflows.sudoku_workflow.SudokuWorkflow",
},
)

Expand Down
74 changes: 74 additions & 0 deletions trinity/common/workflows/sudoku_generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import random


class SudokuGenerator:
"""
Lightweight Sudoku generator.

This generator avoids relying on a single canonical solution by applying
randomized transformations to a solved grid before removing values to
create a puzzle. The difficulty is controlled by the number of removed
cells (holes).

"""

BASE_SOLUTION = [
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Relying on a single standard answer to generate Sudoku puzzles can easily lead to overfitting. Existing works (e.g., python-sudoku-generator-solver) can be referenced for the generation and evaluation parts.

[5, 3, 4, 6, 7, 8, 9, 1, 2],
[6, 7, 2, 1, 9, 5, 3, 4, 8],
[1, 9, 8, 3, 4, 2, 5, 6, 7],
[8, 5, 9, 7, 6, 1, 4, 2, 3],
[4, 2, 6, 8, 5, 3, 7, 9, 1],
[7, 1, 3, 9, 2, 4, 8, 5, 6],
[9, 6, 1, 5, 3, 7, 2, 8, 4],
[2, 8, 7, 4, 1, 9, 6, 3, 5],
[3, 4, 5, 2, 8, 6, 1, 7, 9],
]

def _shuffle_solution(self, board):
"""
Randomize a solved Sudoku grid while preserving validity.

This follows common Sudoku generation techniques:
- permuting numbers
- shuffling rows
- shuffling columns
"""
board = [row[:] for row in board]

# Shuffle numbers 1–9
numbers = list(range(1, 10))
shuffled_numbers = numbers[:]
random.shuffle(shuffled_numbers)
mapping = dict(zip(numbers, shuffled_numbers))
board = [[mapping[v] for v in row] for row in board]

# Shuffle rows
random.shuffle(board)

# Shuffle columns
board = list(map(list, zip(*board)))
random.shuffle(board)
board = list(map(list, zip(*board)))

return board

def generate(self, holes=40):
"""
Generate a Sudoku puzzle.

Args:
holes (int): Number of empty cells (0s) in the puzzle.
Larger values correspond to higher difficulty.

Returns:
tuple: (puzzle, solution)
"""
solution = self._shuffle_solution(self.BASE_SOLUTION)
puzzle = [row[:] for row in solution]

for _ in range(holes):
r = random.randint(0, 8)
c = random.randint(0, 8)
puzzle[r][c] = 0

return puzzle, solution
43 changes: 43 additions & 0 deletions trinity/common/workflows/sudoku_judge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
class SudokuJudge:
"""
Judge Sudoku board state.
- Checks row validity
- Checks column validity
- Checks 3x3 block validity
"""

@staticmethod
def is_valid(board):
# Check rows
for row in board:
nums = [v for v in row if v != 0]
if len(nums) != len(set(nums)):
return False

# Check columns
for col in range(9):
nums = []
for row in range(9):
v = board[row][col]
if v != 0:
nums.append(v)
if len(nums) != len(set(nums)):
return False

# Check 3x3 sub-grids
for br in range(0, 9, 3):
for bc in range(0, 9, 3):
nums = []
for r in range(br, br + 3):
for c in range(bc, bc + 3):
v = board[r][c]
if v != 0:
nums.append(v)
if len(nums) != len(set(nums)):
return False

return True

@staticmethod
def is_solved(board, solution):
return board == solution
182 changes: 182 additions & 0 deletions trinity/common/workflows/sudoku_workflow.py
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for your contribution. The Sudoku has some similarities to frozen lake.
And the current version has significant room for improvement.

A qualified Sudoku workflow should include three parts:

1.A Sudoku generator: Automatically generate solvable Sudoku puzzles and allow you to set the difficulty level.
2. An agentic workflow to solve the Sudoku: Some Sudoku is hard to solve in just one step, so an agentic workflow should be designed to solve the game in multiple steps.
3. A general judge function: Some Sudoku puzzles may have multiple possible solutions, the judge function should correctly parse the model's output and determine the correctness of the result according to the Sudoku rules, not just exactly match.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hi @pan-x-c ,
Thanks for the detailed feedback earlier.

I’ve implemented all the requested changes:

  • Added a SudokuGenerator that produces solvable puzzles (with adjustable difficulty via hole count)
  • Reworked the workflow into a multi-step agentic loop, similar in structure to FrozenLakeWorkflow
  • Added a SudokuJudge that validates rows, columns, and 3×3 blocks instead of exact string matching
  • Integrated generator + judge inside the workflow
  • Updated workflow registry

Please have a look and let me know if you’d like further improvements or additional refinements.

Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import re

from trinity.common.experience import Experience
from trinity.common.workflows.workflow import Workflow

from .sudoku_generator import SudokuGenerator
from .sudoku_judge import SudokuJudge


class SudokuWorkflow(Workflow):
"""
Multi-step Sudoku solving workflow.

This workflow follows a FrozenLake-style agentic interaction pattern:
- Maintains an internal environment state (Sudoku board)
- Interacts with the model step by step
- Provides explicit rules, task description, and strict output format
- Gives feedback on invalid or ineffective actions
- Terminates on success or failure
"""

can_reset = True

def __init__(self, task, model, auxiliary_models=None):
super().__init__(task=task, model=model, auxiliary_models=auxiliary_models)

# Initialize puzzle
if "puzzle" in task.raw_task and "solution" in task.raw_task:
self.board = [row[:] for row in task.raw_task["puzzle"]]
self.solution = [row[:] for row in task.raw_task["solution"]]
else:
generator = SudokuGenerator()
self.board, self.solution = generator.generate()

self.judge = SudokuJudge()
self.max_steps = 20

# State tracking (FrozenLake-style)
self.current_step = 0
self.last_board = None
self.last_action = None

def reset(self, task):
"""Reset the workflow state for a new task."""
self.board = [row[:] for row in task.raw_task["puzzle"]]
self.solution = [row[:] for row in task.raw_task["solution"]]
self.current_step = 0
self.last_board = None
self.last_action = None

def _build_prompt(self):
"""
Build a detailed, step-aware prompt inspired by the Frozen Lake example.
"""
prompt = (
"You are playing a Sudoku game.\n\n"
"Game Rules:\n"
"- The board is a 9x9 grid.\n"
"- A value of 0 represents an empty cell.\n"
"- Each row must contain the numbers 1 through 9 exactly once.\n"
"- Each column must contain the numbers 1 through 9 exactly once.\n"
"- Each 3x3 sub-grid must contain the numbers 1 through 9 exactly once.\n"
"- You may only place numbers in empty cells.\n\n"
"Task:\n"
"- At each step, output ONE valid move to progress toward solving the puzzle.\n\n"
"Output Format (STRICT):\n"
"```row col value```\n\n"
"Example:\n"
"```0 2 4```\n\n"
f"Current Step: {self.current_step}\n"
f"Remaining Steps: {self.max_steps - self.current_step}\n\n"
f"Current Board:\n{self.board}\n"
)

if self.last_board is not None and self.board == self.last_board:
prompt += (
"\nYour last response was invalid or had no effect. "
"Please recheck the Sudoku rules and the required output format."
)

return prompt

def parse_action(self, text):
"""
Parse model output.

Expected format:
```row col value```
"""
matches = re.findall(r"```(.*?)```", text, re.DOTALL)
if not matches:
return None

try:
parts = matches[-1].strip().split()
if len(parts) != 3:
return None

r, c, v = map(int, parts)
if not (0 <= r <= 8 and 0 <= c <= 8 and 1 <= v <= 9):
return None

return r, c, v
except ValueError:
return None

def apply_move(self, r, c, v):
"""Apply a move to the board if the cell is empty."""
if self.board[r][c] == 0:
self.board[r][c] = v

def run(self):
"""
Execute the Sudoku workflow step by step.
"""
experiences = []

for _ in range(self.max_steps):
prompt = self._build_prompt()

responses = self.model.chat([{"role": "user", "content": prompt}])
resp = responses[0]

self.last_board = [row[:] for row in self.board]

action = self.parse_action(resp.response_text)
if action is None:
reward = -1.0
experiences.append(
Experience(
tokens=resp.tokens,
prompt_length=resp.prompt_length,
reward=reward,
logprobs=resp.logprobs,
)
)
break

r, c, v = action
self.apply_move(r, c, v)

# Invalid or ineffective action
if self.board == self.last_board or not self.judge.is_valid(self.board):
reward = -1.0
experiences.append(
Experience(
tokens=resp.tokens,
prompt_length=resp.prompt_length,
reward=reward,
logprobs=resp.logprobs,
)
)
break

# Solved
if self.judge.is_solved(self.board, self.solution):
reward = 1.0
experiences.append(
Experience(
tokens=resp.tokens,
prompt_length=resp.prompt_length,
reward=reward,
logprobs=resp.logprobs,
)
)
break

# Intermediate step
reward = 0.0
experiences.append(
Experience(
tokens=resp.tokens,
prompt_length=resp.prompt_length,
reward=reward,
logprobs=resp.logprobs,
)
)

self.last_action = action
self.current_step += 1

return experiences