diff --git a/src/Build.UnitTests/BackEnd/NodeProviderOutOfProc_Tests.cs b/src/Build.UnitTests/BackEnd/NodeProviderOutOfProc_Tests.cs
new file mode 100644
index 00000000000..e25ef258441
--- /dev/null
+++ b/src/Build.UnitTests/BackEnd/NodeProviderOutOfProc_Tests.cs
@@ -0,0 +1,169 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using Microsoft.Build.BackEnd;
+using Microsoft.Build.Shared;
+using Shouldly;
+using Xunit;
+
+#nullable disable
+
+namespace Microsoft.Build.UnitTests.BackEnd
+{
+ ///
+ /// Tests for NodeProviderOutOfProc, specifically the node over-provisioning detection feature.
+ ///
+ public class NodeProviderOutOfProc_Tests
+ {
+ ///
+ /// Test helper class to expose protected methods for testing.
+ /// Uses configurable overrides for testing.
+ ///
+ private sealed class TestableNodeProviderOutOfProcBase : NodeProviderOutOfProcBase
+ {
+ private readonly int _systemWideNodeCount;
+ private readonly int? _thresholdOverride;
+
+ public TestableNodeProviderOutOfProcBase(int systemWideNodeCount, int? thresholdOverride = null)
+ {
+ _systemWideNodeCount = systemWideNodeCount;
+ _thresholdOverride = thresholdOverride;
+ }
+
+ protected override int GetNodeReuseThreshold()
+ {
+ // If threshold is overridden, use it; otherwise use base implementation
+ return _thresholdOverride ?? base.GetNodeReuseThreshold();
+ }
+
+ protected override int CountSystemWideActiveNodes()
+ {
+ return _systemWideNodeCount;
+ }
+
+ public bool[] TestDetermineNodesForReuse(int nodeCount, bool enableReuse)
+ {
+ return DetermineNodesForReuse(nodeCount, enableReuse);
+ }
+
+ public int TestGetNodeReuseThreshold()
+ {
+ return GetNodeReuseThreshold();
+ }
+ }
+
+ [Fact]
+ public void DetermineNodesForReuse_WhenReuseDisabled_AllNodesShouldTerminate()
+ {
+ var provider = new TestableNodeProviderOutOfProcBase(systemWideNodeCount: 10, thresholdOverride: 4);
+
+ bool[] result = provider.TestDetermineNodesForReuse(nodeCount: 3, enableReuse: false);
+
+ result.Length.ShouldBe(3);
+ result.ShouldAllBe(shouldReuse => shouldReuse == false);
+ }
+
+ [Fact]
+ public void DetermineNodesForReuse_WhenThresholdIsZero_AllNodesShouldTerminate()
+ {
+ var provider = new TestableNodeProviderOutOfProcBase(systemWideNodeCount: 10, thresholdOverride: 0);
+
+ bool[] result = provider.TestDetermineNodesForReuse(nodeCount: 3, enableReuse: true);
+
+ result.Length.ShouldBe(3);
+ result.ShouldAllBe(shouldReuse => shouldReuse == false);
+ }
+
+ [Fact]
+ public void DetermineNodesForReuse_WhenUnderThreshold_AllNodesShouldBeReused()
+ {
+ // System has 3 nodes total, threshold is 4, so we're under the limit
+ var provider = new TestableNodeProviderOutOfProcBase(systemWideNodeCount: 3, thresholdOverride: 4);
+
+ bool[] result = provider.TestDetermineNodesForReuse(nodeCount: 3, enableReuse: true);
+
+ result.Length.ShouldBe(3);
+ result.ShouldAllBe(shouldReuse => shouldReuse == true);
+ }
+
+ [Fact]
+ public void DetermineNodesForReuse_WhenAtThreshold_AllNodesShouldBeReused()
+ {
+ // System has 4 nodes total, threshold is 4, so we're at the limit
+ var provider = new TestableNodeProviderOutOfProcBase(systemWideNodeCount: 4, thresholdOverride: 4);
+
+ bool[] result = provider.TestDetermineNodesForReuse(nodeCount: 4, enableReuse: true);
+
+ result.Length.ShouldBe(4);
+ result.ShouldAllBe(shouldReuse => shouldReuse == true);
+ }
+
+ [Fact]
+ public void DetermineNodesForReuse_WhenOverThreshold_ExcessNodesShouldTerminate()
+ {
+ // System has 10 nodes total, threshold is 4
+ // This instance has 3 nodes
+ // We should keep 0 nodes from this instance (since 10 - 3 = 7, which is already > threshold)
+ var provider = new TestableNodeProviderOutOfProcBase(systemWideNodeCount: 10, thresholdOverride: 4);
+
+ bool[] result = provider.TestDetermineNodesForReuse(nodeCount: 3, enableReuse: true);
+
+ result.Length.ShouldBe(3);
+ result.ShouldAllBe(shouldReuse => shouldReuse == false);
+ }
+
+ [Fact]
+ public void DetermineNodesForReuse_WhenSlightlyOverThreshold_SomeNodesShouldBeReused()
+ {
+ // System has 6 nodes total, threshold is 4
+ // This instance has 3 nodes
+ // Other instances have 6 - 3 = 3 nodes
+ // We need to reduce by 2 nodes to reach threshold
+ // So we should keep 1 node from this instance
+ var provider = new TestableNodeProviderOutOfProcBase(systemWideNodeCount: 6, thresholdOverride: 4);
+
+ bool[] result = provider.TestDetermineNodesForReuse(nodeCount: 3, enableReuse: true);
+
+ result.Length.ShouldBe(3);
+ // First node should be reused, others should terminate
+ result[0].ShouldBeTrue();
+ result[1].ShouldBeFalse();
+ result[2].ShouldBeFalse();
+ }
+
+ [Fact]
+ public void DetermineNodesForReuse_WithSingleNode_BehavesCorrectly()
+ {
+ // System has 5 nodes total, threshold is 4
+ // This instance has 1 node
+ // We're over threshold, but only by 1
+ // We should terminate this node since others already meet threshold
+ var provider = new TestableNodeProviderOutOfProcBase(systemWideNodeCount: 5, thresholdOverride: 4);
+
+ bool[] result = provider.TestDetermineNodesForReuse(nodeCount: 1, enableReuse: true);
+
+ result.Length.ShouldBe(1);
+ result[0].ShouldBeFalse();
+ }
+
+ [Fact]
+ public void GetNodeReuseThreshold_DefaultImplementation_ReturnsHalfOfCoreCount()
+ {
+ // Test the default implementation by not providing a threshold override
+ // Note: This test uses the actual system core count, so results vary by machine,
+ // but the mathematical relationship (threshold = max(1, cores/2)) should hold on all systems
+ int coreCount = NativeMethodsShared.GetLogicalCoreCount();
+ int expectedThreshold = Math.Max(1, coreCount / 2);
+
+ // Create a provider WITHOUT threshold override to test the base class implementation
+ var provider = new TestableNodeProviderOutOfProcBase(systemWideNodeCount: 0, thresholdOverride: null);
+
+ // The threshold from the provider should match our expected calculation
+ int actualThreshold = provider.TestGetNodeReuseThreshold();
+ actualThreshold.ShouldBe(expectedThreshold);
+ actualThreshold.ShouldBeGreaterThanOrEqualTo(1);
+ actualThreshold.ShouldBeLessThanOrEqualTo(coreCount);
+ }
+ }
+}
diff --git a/src/Build/BackEnd/Components/Communications/NodeProviderOutOfProcBase.cs b/src/Build/BackEnd/Components/Communications/NodeProviderOutOfProcBase.cs
index b74013ba0f0..6a81345f282 100644
--- a/src/Build/BackEnd/Components/Communications/NodeProviderOutOfProcBase.cs
+++ b/src/Build/BackEnd/Components/Communications/NodeProviderOutOfProcBase.cs
@@ -116,23 +116,39 @@ protected void ShutdownConnectedNodes(List contextsToShutDown, bool
!Console.IsInputRedirected &&
Traits.Instance.EscapeHatches.EnsureStdOutForChildNodesIsPrimaryStdout;
+ // Determine which nodes should actually be reused based on system-wide node count
+ bool[] shouldReuseNode = DetermineNodesForReuse(contextsToShutDown.Count, enableReuse);
+
Task[] waitForExitTasks = waitForExit && contextsToShutDown.Count > 0 ? new Task[contextsToShutDown.Count] : null;
int i = 0;
+ int contextIndex = 0;
var loggingService = _componentHost.LoggingService;
foreach (NodeContext nodeContext in contextsToShutDown)
{
if (nodeContext is null)
{
+ contextIndex++;
continue;
}
- nodeContext.SendData(new NodeBuildComplete(enableReuse));
- if (waitForExit)
+
+ // Use the per-node reuse decision
+ bool reuseThisNode = shouldReuseNode[contextIndex++];
+ nodeContext.SendData(new NodeBuildComplete(reuseThisNode));
+
+ if (!reuseThisNode || waitForExit)
{
- waitForExitTasks[i++] = nodeContext.WaitForExitAsync(loggingService);
+ if (i < (waitForExitTasks?.Length ?? 0))
+ {
+ waitForExitTasks[i++] = nodeContext.WaitForExitAsync(loggingService);
+ }
}
}
- if (waitForExitTasks != null)
+ if (waitForExitTasks != null && i > 0)
{
+ if (i < waitForExitTasks.Length)
+ {
+ Array.Resize(ref waitForExitTasks, i);
+ }
Task.WaitAll(waitForExitTasks);
}
}
@@ -511,6 +527,170 @@ private string GetProcessesToIgnoreKey(Handshake hostHandshake, int nodeProcessI
#endif
}
+ ///
+ /// Determines which nodes should be reused based on system-wide node count to avoid over-provisioning.
+ ///
+ /// The number of nodes in this MSBuild instance
+ /// Whether reuse is enabled at all
+ /// Array indicating which nodes should be reused (true) or terminated (false)
+ protected virtual bool[] DetermineNodesForReuse(int nodeCount, bool enableReuse)
+ {
+ bool[] shouldReuse = new bool[nodeCount];
+
+ // If reuse is disabled, no nodes should be reused
+ if (!enableReuse)
+ {
+ return shouldReuse; // All false
+ }
+
+ // Get threshold for this node type
+ int maxNodesToKeep = GetNodeReuseThreshold();
+
+ // If threshold is 0, terminate all nodes in this instance
+ if (maxNodesToKeep == 0)
+ {
+ CommunicationsUtilities.Trace("Node reuse threshold is 0, terminating all {0} nodes", nodeCount);
+ return shouldReuse; // All false
+ }
+
+ // Count system-wide active nodes of the same type
+ int systemWideNodeCount = CountSystemWideActiveNodes();
+
+ CommunicationsUtilities.Trace("System-wide node count: {0}, threshold: {1}, this instance has: {2} nodes",
+ systemWideNodeCount, maxNodesToKeep, nodeCount);
+
+ // If we're already under the threshold system-wide, keep all our nodes
+ if (systemWideNodeCount <= maxNodesToKeep)
+ {
+ for (int i = 0; i < nodeCount; i++)
+ {
+ shouldReuse[i] = true;
+ }
+ return shouldReuse;
+ }
+
+ // We're over-provisioned. Determine how many of our nodes to keep.
+ // Strategy: Keep nodes up to the threshold, terminate the rest.
+ // This instance's contribution is limited to help reach the threshold.
+ int nodesToKeepInThisInstance = Math.Max(0, maxNodesToKeep - (systemWideNodeCount - nodeCount));
+
+ CommunicationsUtilities.Trace("Keeping {0} of {1} nodes in this instance to help meet threshold of {2}",
+ nodesToKeepInThisInstance, nodeCount, maxNodesToKeep);
+
+ // Mark the first N nodes for reuse
+ for (int i = 0; i < Math.Min(nodesToKeepInThisInstance, nodeCount); i++)
+ {
+ shouldReuse[i] = true;
+ }
+
+ return shouldReuse;
+ }
+
+ ///
+ /// Gets the maximum number of nodes of this type that should remain active system-wide.
+ ///
+ /// The threshold for node reuse
+ protected virtual int GetNodeReuseThreshold()
+ {
+ // Default for worker nodes: NUM_PROCS / 2
+ // Derived classes (Server, RAR) can override to return 0
+ return Math.Max(1, NativeMethodsShared.GetLogicalCoreCount() / 2);
+ }
+
+ ///
+ /// Counts the number of active MSBuild node processes of the same type system-wide.
+ /// Uses improved node detection logic to filter by NodeMode and handle dotnet processes.
+ ///
+ /// The count of active node processes
+ protected virtual int CountSystemWideActiveNodes()
+ => CountActiveNodesWithMode(NodeMode.OutOfProcNode);
+
+ ///
+ /// Counts the number of active MSBuild processes running with the specified .
+ /// Includes the current process in the count if it matches.
+ /// Used by out-of-proc nodes (e.g., server node) to detect over-provisioning at build completion.
+ ///
+ /// The node mode to filter for.
+ /// The number of matching processes, or 0 if enumeration fails or the feature wave is disabled.
+ internal static int CountActiveNodesWithMode(NodeMode nodeMode)
+ {
+ try
+ {
+ (_, IList nodeProcesses) = GetPossibleRunningNodes(nodeMode);
+ int count = nodeProcesses.Count;
+ foreach (var process in nodeProcesses)
+ {
+ process?.Dispose();
+ }
+ return count;
+ }
+ catch (Exception ex)
+ {
+ CommunicationsUtilities.Trace("Error counting system-wide nodes with mode {0}: {1}", nodeMode, ex.Message);
+ return 0;
+ }
+ }
+
+ private static (string expectedProcessName, IList nodeProcesses) GetPossibleRunningNodes(NodeMode? expectedNodeMode)
+ {
+ string msbuildLocation = Constants.MSBuildExecutableName;
+ var expectedProcessName = Path.GetFileNameWithoutExtension(CurrentHost.GetCurrentHost() ?? msbuildLocation);
+
+ Process[] processes;
+ try
+ {
+ processes = Process.GetProcessesByName(expectedProcessName);
+ }
+ catch
+ {
+ return (expectedProcessName, Array.Empty());
+ }
+
+ if (expectedNodeMode.HasValue && ChangeWaves.AreFeaturesEnabled(ChangeWaves.Wave18_5))
+ {
+ List filteredProcesses = [];
+ bool isDotnetProcess = expectedProcessName.Equals(Path.GetFileNameWithoutExtension(Constants.DotnetProcessName), StringComparison.OrdinalIgnoreCase);
+
+ foreach (var process in processes)
+ {
+ try
+ {
+ if (!process.TryGetCommandLine(out string commandLine))
+ {
+ continue;
+ }
+
+ if (commandLine is null)
+ {
+ filteredProcesses.Add(process);
+ continue;
+ }
+
+ if (isDotnetProcess && !commandLine.Contains("MSBuild.dll", StringComparison.OrdinalIgnoreCase))
+ {
+ continue;
+ }
+
+ NodeMode? processNodeMode = NodeModeHelper.ExtractFromCommandLine(commandLine);
+ if (processNodeMode.HasValue && processNodeMode.Value == expectedNodeMode.Value)
+ {
+ filteredProcesses.Add(process);
+ }
+ }
+ catch
+ {
+ continue;
+ }
+ }
+
+ filteredProcesses.Sort((left, right) => left.Id.CompareTo(right.Id));
+ return (expectedProcessName, filteredProcesses);
+ }
+
+ Array.Sort(processes, (left, right) => left.Id.CompareTo(right.Id));
+ return (expectedProcessName, processes);
+ }
+
#if !FEATURE_PIPEOPTIONS_CURRENTUSERONLY
// This code needs to be in a separate method so that we don't try (and fail) to load the Windows-only APIs when JIT-ing the code
// on non-Windows operating systems
diff --git a/src/Build/BackEnd/Node/OutOfProcServerNode.cs b/src/Build/BackEnd/Node/OutOfProcServerNode.cs
index 49ed1d610dd..bd1aae2a681 100644
--- a/src/Build/BackEnd/Node/OutOfProcServerNode.cs
+++ b/src/Build/BackEnd/Node/OutOfProcServerNode.cs
@@ -319,7 +319,22 @@ private void HandlePacket(INodePacket packet)
///
private void HandleServerShutdownCommand(NodeBuildComplete buildComplete)
{
- _shutdownReason = buildComplete.PrepareForReuse ? NodeEngineShutdownReason.BuildCompleteReuse : NodeEngineShutdownReason.BuildComplete;
+ bool shouldReuse = buildComplete.PrepareForReuse;
+
+ if (shouldReuse)
+ {
+ // Self-terminate if another server node is already running system-wide.
+ // Threshold is 1: only one server node should be active per handshake.
+ // If another is running (count > 1, since we count ourselves), exit to avoid over-provisioning.
+ int serverNodeCount = NodeProviderOutOfProcBase.CountActiveNodesWithMode(NodeMode.OutOfProcServerNode);
+ if (serverNodeCount > 1)
+ {
+ CommunicationsUtilities.Trace("Terminating server node due to over-provisioning: {0} server nodes found system-wide.", serverNodeCount);
+ shouldReuse = false;
+ }
+ }
+
+ _shutdownReason = shouldReuse ? NodeEngineShutdownReason.BuildCompleteReuse : NodeEngineShutdownReason.BuildComplete;
_shutdownEvent.Set();
}
diff --git a/src/Shared/ProcessExtensions.cs b/src/Shared/ProcessExtensions.cs
index 7ddd8dc6ab6..49f43cd4b5f 100644
--- a/src/Shared/ProcessExtensions.cs
+++ b/src/Shared/ProcessExtensions.cs
@@ -49,7 +49,7 @@ public static void KillTree(this Process process, int timeoutMilliseconds)
///
/// The process to get the command line for.
/// The command line string, or null if it cannot be retrieved.
- /// True if the command line was successfully retrieved or the current platform doesn't support retrieving command lines, false if there was an error retrieving the command line.
+ /// True if the command line was successfully retrieved, false if there was an error or the platform doesn't support command line retrieval.
public static bool TryGetCommandLine(this Process? process, out string? commandLine)
{
commandLine = null;
diff --git a/src/Utilities.UnitTests/ProcessExtensions_Tests.cs b/src/Utilities.UnitTests/ProcessExtensions_Tests.cs
index 8f52ca1a0dd..6b954bcad47 100644
--- a/src/Utilities.UnitTests/ProcessExtensions_Tests.cs
+++ b/src/Utilities.UnitTests/ProcessExtensions_Tests.cs
@@ -88,7 +88,7 @@ public async Task TryGetCommandLine_RunningProcess_ContainsArguments()
{
await Task.Delay(300);
var sw = Stopwatch.StartNew();
- p.TryGetCommandLine(out string commandLine);
+ p.TryGetCommandLine(out string commandLine).ShouldBeTrue();
sw.Stop();
_output.WriteLine($"TryGetCommandLine elapsed: {sw.Elapsed.TotalMilliseconds:F2} ms");