Description
Description
I noticed some of my stress test unit tests were taking ~6 minutes on .Net 8 and ~5 seconds on .Net Framework 4.8. I ran some benchmarks and was able to get a minimal repro with this.
Benchmark Code
[MaxIterationCount(30)]
public class MultiThreadBenchmark
{
private readonly MyThreadPoolSynchronizationContext _alternativeThreadPool = new MyThreadPoolSynchronizationContext();
[Benchmark]
[Arguments(2)]
[Arguments(7)]
public void MultiThreadTest(int postCount)
{
for (int i = 0; i < postCount; ++i)
{
_alternativeThreadPool.Post(_ => { }, null);
}
_alternativeThreadPool.WaitForAllThreadsToComplete();
}
}
public sealed class MyThreadPoolSynchronizationContext : SynchronizationContext
{
volatile private int _runningActionCount;
private sealed class ThreadRunner
{
private static readonly Stack<ThreadRunner> s_pool = new Stack<ThreadRunner>();
private static int s_threadCounter;
private MyThreadPoolSynchronizationContext _owner;
private readonly object _locker = new object();
private SendOrPostCallback _callback;
private object _state;
public static void Run(MyThreadPoolSynchronizationContext owner, SendOrPostCallback callback, object state)
{
Interlocked.Increment(ref owner._runningActionCount);
bool reused = false;
ThreadRunner threadRunner = null;
lock (s_pool)
{
if (s_pool.Count > 0)
{
reused = true;
threadRunner = s_pool.Pop();
}
}
if (!reused)
{
threadRunner = new ThreadRunner();
}
lock (threadRunner._locker)
{
threadRunner._owner = owner;
threadRunner._callback = callback;
threadRunner._state = state;
if (reused)
{
Monitor.Pulse(threadRunner._locker);
}
else
{
new Thread(threadRunner.ThreadAction)
{
IsBackground = true,
Name = $"MyThreadPoolSynchronizationContext_{Interlocked.Increment(ref s_threadCounter)}"
}.Start();
}
}
}
private void ThreadAction()
{
SetSynchronizationContext(_owner);
while (true)
{
MyThreadPoolSynchronizationContext owner = _owner;
SendOrPostCallback callback = _callback;
object state = _state;
// Allow GC to reclaim memory.
_owner = null;
_callback = null;
_state = null;
callback.Invoke(state);
Interlocked.Decrement(ref owner._runningActionCount);
lock (_locker)
{
lock (s_pool)
{
s_pool.Push(this);
}
Monitor.Wait(_locker);
}
}
}
}
public void WaitForAllThreadsToComplete()
{
var spinner = new SpinWait();
while (_runningActionCount > 0)
{
spinner.SpinOnce();
}
}
public override SynchronizationContext CreateCopy()
{
return this;
}
public override void Post(SendOrPostCallback d, object state)
{
if (d == null)
{
throw new System.ArgumentNullException("d", "SendOrPostCallback may not be null.");
}
ThreadRunner.Run(this, d, state);
}
public override void Send(SendOrPostCallback d, object state)
{
throw new System.InvalidOperationException();
}
}
Regression?
Yes
Data
BenchmarkDotNet v0.15.0, Windows 10 (10.0.19045.5854/22H2/2022Update)
AMD Ryzen 7 9800X3D 4.70GHz, 1 CPU, 16 logical and 8 physical cores
.NET SDK 10.0.100-preview.4.25258.110
[Host] : .NET 8.0.16 (8.0.1625.21506), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI
Job-CXDRYT : .NET 10.0.0 (10.0.25.25910), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI
Job-TIUQAM : .NET 8.0.16 (8.0.1625.21506), X64 RyuJIT AVX-512F+CD+BW+DQ+VL+VBMI
Job-AGYXWA : .NET Framework 4.8.1 (4.8.9310.0), X64 RyuJIT VectorSize=256
Method | Runtime | postCount | Mean | Error | StdDev | Ratio | RatioSD |
---|---|---|---|---|---|---|---|
MultiThreadTest | .NET 10.0 | 2 | 98.871 us | 19.2124 us | 28.1613 us | 1.09 | 0.48 |
MultiThreadTest | .NET 8.0 | 2 | 86.410 us | 16.9564 us | 25.3796 us | 0.96 | 0.42 |
MultiThreadTest | .NET Framework 4.8 | 2 | 6.837 us | 0.0946 us | 0.0838 us | 0.08 | 0.02 |
MultiThreadTest | .NET 10.0 | 7 | 342.402 us | 147.1440 us | 220.2382 us | 1.90 | 2.56 |
MultiThreadTest | .NET 8.0 | 7 | 1,983.185 us | 370.6014 us | 554.6988 us | 11.01 | 11.85 |
MultiThreadTest | .NET Framework 4.8 | 7 | 11.221 us | 0.5488 us | 0.8044 us | 0.06 | 0.06 |
Analysis
It looks like the performance issue is caused by Monitor
(I suspect Wait
and Pulse
are the culprits, but it could be Enter
and Exit
), but I haven't dug any deeper to confirm.
These benchmarks have a very high variance, but the results are repeatable. Here are the results of another run:
Method | Runtime | postCount | Mean | Error | StdDev | Ratio | RatioSD |
---|---|---|---|---|---|---|---|
MultiThreadTest | .NET 10.0 | 2 | 248.262 us | 94.0082 us | 140.7070 us | 1.84 | 3.43 |
MultiThreadTest | .NET 8.0 | 2 | 198.853 us | 71.0895 us | 104.2021 us | 1.47 | 2.68 |
MultiThreadTest | .NET Framework 4.8 | 2 | 6.795 us | 0.0658 us | 0.0583 us | 0.05 | 0.08 |
MultiThreadTest | .NET 10.0 | 7 | 629.493 us | 83.4623 us | 124.9225 us | 1.05 | 0.36 |
MultiThreadTest | .NET 8.0 | 7 | 442.401 us | 173.0234 us | 258.9733 us | 0.74 | 0.49 |
MultiThreadTest | .NET Framework 4.8 | 7 | 16.042 us | 0.5617 us | 0.8233 us | 0.03 | 0.01 |