NVIDIA · rapids-bot · Jan 14, 2026 · Jan 15, 2026 · Jan 15, 2026 · Jan 15, 2026
@@ -27,6 +27,7 @@ limitations under the License.
 [![GitHub pull requests](https://img.shields.io/github/issues-pr/NVIDIA/NeMo-Agent-Toolkit)](https://github.com/NVIDIA/NeMo-Agent-Toolkit/pulls)
 [![GitHub Repo stars](https://img.shields.io/github/stars/NVIDIA/NeMo-Agent-Toolkit)](https://github.com/NVIDIA/NeMo-Agent-Toolkit)
 [![GitHub forks](https://img.shields.io/github/forks/NVIDIA/NeMo-Agent-Toolkit)](https://github.com/NVIDIA/NeMo-Agent-Toolkit/network/members)
+[![Ask DeepWiki](https://deepwiki.com/badge.svg)](https://deepwiki.com/NVIDIA/NeMo-Agent-Toolkit)
 [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/NVIDIA/NeMo-Agent-Toolkit/)
 <!-- vale on -->
 

@@ -21,6 +21,8 @@ limitations under the License.
 
 - **Workflow Not Found**: Ensure that your workflow is correctly registered and that the `_type` in your configuration file matches the workflow's `_type`.
 
+- **Component Not Found**: If NeMo Agent Toolkit reports that a given component is missing, and you know that the corresponding package is installed, then there is likely an issue with your Python environment. Ensure that there is no active Conda Python environment. Only create vanilla Python virtual environments through `python -m venv` or `uv venv` with no other active environments. One key indication of this will be a shell prompt prefixed with `(base)` or the result of `which python` pointing to a Conda or Anaconda-managed Python distribution.
+
 - **Requested {category} type is ambiguous**: This error might arise when the `_type` in your configuration file is not unique. Please ensure that the `_type` is unique for each workflow. It can also occur after upgrading the toolkit from a previous version in-place when developing. To fix this issue, run the following commands:
 
     <!-- path-check-skip-begin -->

@@ -322,10 +322,11 @@ dev = [
   "nvidia-nat_test",
   "nvidia-sphinx-theme>=0.0.9",
   "pre-commit>=4.0,<5.0",
-  "pytest_httpserver==1.1.*",
+  "pytest~=8.3",
   "pytest-asyncio==0.24.*",
   "pytest-cov~=6.1",
-  "pytest~=8.3",
+  "pytest_httpserver==1.1.*",
+  "pytest-timeout~=2.4",
   "python-docx~=1.1.0",
   "ruff~=0.12",
   "setuptools >= 64",
@@ -407,6 +408,10 @@ testpaths = ["tests", "examples/**/tests", "packages/**/tests"]
 asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "session"
 
+# Global timeout of 5 minutes per test to catch hanging tests.
+# Individual tests can override with @pytest.mark.timeout(seconds) or disable with @pytest.mark.timeout(0)
+timeout = 300
+
 
 [tool.ruff]
 # Set the maximum line length to 120.

@@ -52,33 +52,36 @@
 
 Here is an example of how a valid JSON output should look:
 
+
+```json
 [
-  \'{{
+  {{
     "plan": "Find Alex's schedule on Sep 25, 2025",
-    "evidence": \'{{
+    "evidence": {{
       "placeholder": "#E1",
       "tool": "search_calendar",
-      "tool_input": ("Alex", "09/25/2025")
-    }}\'
-  }}\',
-  \'{{
+      "tool_input": ["Alex", "09/25/2025"]
+    }}
+  }},
+  {{
     "plan": "Find Bill's schedule on sep 25, 2025",
-    "evidence": \'{{
+    "evidence": {{
       "placeholder": "#E2",
       "tool": "search_calendar",
-      "tool_input": ("Bill", "09/25/2025")
-    }}\'
-  }}\',
-  \'{{
+      "tool_input": ["Bill", "09/25/2025"]
+    }}
+  }},
+  {{
     "plan": "Suggest a time for 1-hour meeting given Alex's and Bill's schedule.",
-    "evidence": \'{{
+    "evidence": {{
       "placeholder": "#E3",
       "tool": "llm_chat",
       "tool_input": "Find a common 1-hour time slot for Alex and Bill given their schedules. \
 Alex's schedule: #E1; Bill's schedule: #E2?"
-    }}\'
-  }}\'
+    }}
+  }}
 ]
+```
 
 Begin!
 """

@@ -223,7 +223,9 @@ class CrossOriginResourceSharing(BaseModel):
     max_running_async_jobs: int = Field(
         default=10,
         description=(
-            "Maximum number of async jobs to run concurrently, this controls the number of dask workers created. "
+            "Maximum number of Dask workers to create for running async jobs, the name of this parameter is "
+            "misleading as the actual number of concurrent async jobs is: "
+            "`max_running_async_jobs * dask_threads_per_worker` ."
             "This parameter is only used when scheduler_address is `None` and a Dask local cluster is created."),
         ge=1)
     dask_workers: typing.Literal["threads", "processes"] = Field(
@@ -242,6 +244,13 @@ class CrossOriginResourceSharing(BaseModel):
         description=("Memory limit for each Dask worker. Can be 'auto', a memory string like '4GB' or a float "
                      "representing a fraction of the system memory. "
                      "Refer to https://docs.dask.org/en/stable/deploying-python.html#reference for details."))
+
+    dask_threads_per_worker: int = Field(
+        default=1,
+        description=(
+            "Number of threads to use per worker. This parameter is only used when the value is greater than 0 and "
+            "scheduler_address is `None` and a local Dask cluster is created. When set to 0 the value uses the Dask "
+            "default."))
     step_adaptor: StepAdaptorConfig = StepAdaptorConfig()
 
     workflow: typing.Annotated[EndpointBase, Field(description="Endpoint for the default workflow.")] = EndpointBase(

@@ -123,12 +123,17 @@ async def run(self):
                         except Exception:
                             pass  # Keep as string (e.g., "auto", "4GB")
 
+                    dask_kwargs = {}
+                    if self.front_end_config.dask_threads_per_worker > 0:
+                        dask_kwargs["threads_per_worker"] = self.front_end_config.dask_threads_per_worker
+
                     # set n_workers to max_running_async_jobs + 1 to allow for one worker to handle the cleanup task
                     self._cluster = LocalCluster(processes=not self._use_dask_threads,
                                                  silence_logs=dask_log_level,
                                                  protocol="tcp",
                                                  memory_limit=memory_limit,
-                                                 n_workers=self.front_end_config.max_running_async_jobs + 1)
+                                                 n_workers=self.front_end_config.max_running_async_jobs + 1,
+                                                 **dask_kwargs)
 
                     self._scheduler_address = self._cluster.scheduler.address