在.Net 8中StringBuilder的Append性能改进

前言

马上.Net 8就要发布正式版本了,还有不少当时记录感兴趣的提交,比如StringBuilder的Append优化.改动不是不大,但对性能是有提升.主要有这两个:
  1. Unroll StringBuilder.Append for const string (#85894)
  2. StringBuilder: use Span.Fill in Append repeating char (#86287)

测试代码

using System.Text;
using BenchmarkDotNet.Attributes;

namespace CSharpBenchmarks.StringTest
{
	[MemoryDiagnoser]
	[DisassemblyDiagnoser(printSource: true)]
	public class StringBuilderTest
	{
		[Params(16, 128)]
		public int Count { get; set; }

		[Benchmark]
		public void AppendTest()
		{
			for (int i = 0; i < Count; i++)
			{
				_ = Append();
			}
		}

		public string Append()
		{
			StringBuilder sb = new StringBuilder(16);
			_ = sb.Append("hello world!");
			return sb.ToString();
		}
	}
}

看BenchmarkDotNet执行的结果,单次Append的结果:

在.Net 8中StringBuilder的Append方法性能提升差不多在15%左右

看结果Appen的方法在.Net 8中比.Net 7性能提升差不多在15%左右.

下面接着看16次和128次执行结果对比:

在.Net 8和.Net 7中StringBuilder的Append方法性能提升差不多在20%和14%左右

如果对比生成的Code Size列,发现单次执行Append的大小为: .Net 7是855B, .Net 8是930B,而多次执行JIT经过分层编译(0层提升1层后,JIT会对代码进行优化)大小为: .Net 7是129B, .Net 8是689B 

接下来我们分别看看.Net 7和.Net 8生成的汇编代码:

;;.Net 7
; CSharpBenchmarks.StringTest.StringBuilderTest.AppendTest()
; 			for (int i = 0; i < Count; i++)
; 			     ^^^^^^^^^
; 				Append();
; 				^^^^^^^^^
       push      rdi
       push      rsi
       sub       rsp,28
       mov       rsi,rcx
       xor       edi,edi
       cmp       dword ptr [rsi+8],0
       jle       short M00_L01
M00_L00:
       mov       rcx,rsi
       call      qword ptr [7FF90867D810]; CSharpBenchmarks.StringTest.StringBuilderTest.Append() 
       inc       edi
       cmp       edi,[rsi+8]
       jl        short M00_L00
M00_L01:
       add       rsp,28
       pop       rsi
       pop       rdi
       ret
; Total bytes of code 40

; 
; CSharpBenchmarks.StringTest.StringBuilderTest.Append()
; 			StringBuilder sb = new StringBuilder(16);
; 			^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
; 			sb.Append("hello world!");
; 			^^^^^^^^^^^^^^^^^^^^^^^^^^
       push      rsi
       sub       rsp,20
       mov       rcx,offset MT_System.Text.StringBuilder
       call      CORINFO_HELP_NEWSFAST
       mov       rsi,rax
       mov       rcx,rsi
       mov       edx,10
       mov       r8d,7FFFFFFF
;调用StringBuilder构造方法
       call      qword ptr [7FF908675540]; System.Text.StringBuilder..ctor(Int32, Int32) 
       mov       rdx,1F0E4C08DB8
       mov       rdx,[rdx]
       add       rdx,0C
       mov       rcx,rsi
       mov       r8d,0C
;调用StringBuilder的Append
       call      qword ptr [7FF908675E10]; System.Text.StringBuilder.Append(Char ByRef, Int32)  
       mov       rcx,rsi
       add       rsp,20
       pop       rsi
;调用ToString方法,返回string
       jmp       qword ptr [7FF9086817C0]; System.Text.StringBuilder.ToString()
; Total bytes of code 89
```

.Net 8汇编代码:

; .Net 8对Append方法进行了内联优化,省掉了一次方法调用
; 在使用Span.Fill填充也有优化
; CSharpBenchmarks.StringTest.StringBuilderTest.AppendTest()
; 			for (int i = 0; i < Count; i++)
; 			     ^^^^^^^^^
; 				Append();
; 				^^^^^^^^^
       push      rdi
       push      rsi
       push      rbx
       sub       rsp,20
       vzeroupper
       mov       rbx,rcx
       xor       esi,esi
       cmp       dword ptr [rbx+8],0
       jle       short M00_L02
M00_L00:
       mov       rcx,offset MT_System.Text.StringBuilder
       call      CORINFO_HELP_NEWSFAST
       mov       rdi,rax
       mov       dword ptr [rdi+20],7FFFFFFF
       mov       edx,10
       mov       rcx,offset MT_System.Char[]
       call      CORINFO_HELP_NEWARR_1_VC
       lea       rcx,[rdi+8]
       mov       rdx,rax
       call      CORINFO_HELP_ASSIGN_REF
       mov       rdx,1F2803086FC
       mov       rcx,[rdi+8]
       mov       r8d,[rdi+18]
       lea       eax,[r8+0C]
       cmp       [rcx+8],eax
       jae       short M00_L03
       mov       rcx,rdi
       mov       r8d,0C
       call      qword ptr [7FF974D8E508]; System.Text.StringBuilder.AppendWithExpansion(Char ByRef, Int32)
M00_L01:
       mov       rcx,rdi
       call      qword ptr [7FF974D992B0]; System.Text.StringBuilder.ToString()
       inc       esi
       cmp       esi,[rbx+8]
       jl        short M00_L00
M00_L02:
       add       rsp,20
       pop       rbx
       pop       rsi
       pop       rdi
       ret
M00_L03:
       movsxd    rax,r8d
       lea       rcx,[rcx+rax*2+10]
       vmovdqu   xmm0,xmmword ptr [rdx]
       vmovdqu   xmm1,xmmword ptr [rdx+8]
       vmovdqu   xmmword ptr [rcx],xmm0
       vmovdqu   xmmword ptr [rcx+8],xmm1
       add       r8d,0C
       mov       [rdi+18],r8d
       jmp       short M00_L01
; Total bytes of code 180

; 调用AppendWithExpansion(.Net 7没有这个方法)
; System.Text.StringBuilder.AppendWithExpansion(Char ByRef, Int32)
       push      r15
       push      r14
       push      r13
       push      rdi
       push      rsi
       push      rbp
       push      rbx
       sub       rsp,20
       mov       rbx,rcx
       mov       rdi,rdx
       mov       esi,r8d
       mov       ecx,[rbx+1C]
       mov       ebp,[rbx+18]
       add       ecx,ebp
       add       ecx,esi
       cmp       ecx,[rbx+20]
       jg        short M01_L02
       cmp       ecx,esi
       jl        short M01_L02
       mov       rcx,[rbx+8]
       mov       r14d,[rcx+8]
       sub       r14d,ebp
       test      r14d,r14d
       jg        near ptr M01_L03
M01_L00:
       sub       esi,r14d
       mov       rcx,rbx
       mov       edx,esi
       call      qword ptr [7FF974D8E5C8]; System.Text.StringBuilder.ExpandByABlock(Int32)
       movsxd    r8,r14d
       lea       rdx,[rdi+r8*2]
       mov       r8,[rbx+8]
       test      r8,r8
       je        near ptr M01_L08
       lea       rcx,[r8+10]
       mov       r15d,[r8+8]
       cmp       esi,r15d
       ja        near ptr M01_L09
M01_L01:
       mov       r8d,esi
       add       r8,r8
       call      qword ptr [7FF9749F5E78]; System.Buffer.Memmove(Byte ByRef, Byte ByRef, UIntPtr)
       mov       [rbx+18],esi
       add       rsp,20
       pop       rbx
       pop       rbp
       pop       rsi
       pop       rdi
       pop       r13
       pop       r14
       pop       r15
       ret
M01_L02:
       mov       rcx,offset MT_System.ArgumentOutOfRangeException
       call      CORINFO_HELP_NEWSFAST
       mov       rbx,rax
       mov       ecx,184C1
       mov       rdx,7FF9747B4000
       call      CORINFO_HELP_STRCNS
       mov       rbp,rax
       call      qword ptr [7FF9749C7D50]
       mov       r8,rax
       mov       rdx,rbp
       mov       rcx,rbx
       call      qword ptr [7FF974A36280]
       mov       rcx,rbx
       call      CORINFO_HELP_THROW
M01_L03:
       mov       r15,[rbx+8]
       test      r15,r15
       jne       short M01_L04
       test      ebp,ebp
       jne       short M01_L05
       xor       ecx,ecx
       xor       r13d,r13d
       jmp       short M01_L07
M01_L04:
       cmp       [r15+8],ebp
       jae       short M01_L06
M01_L05:
       call      qword ptr [7FF9749C4060]
       int       3
M01_L06:
       mov       r8d,ebp
       lea       rcx,[r15+r8*2+10]
       mov       r13d,[r15+8]
       sub       r13d,ebp
M01_L07:
       cmp       r14d,r13d
       ja        short M01_L09
       mov       r8d,r14d
       add       r8,r8
       mov       rdx,rdi
;调用Memmove
       call      qword ptr [7FF9749F5E78]; System.Buffer.Memmove(Byte ByRef, Byte ByRef, UIntPtr)
       mov       rax,[rbx+8]
       mov       eax,[rax+8]
       mov       [rbx+18],eax
       jmp       near ptr M01_L00
M01_L08:
       xor       ecx,ecx
       xor       r15d,r15d
       cmp       esi,r15d
       jbe       near ptr M01_L01
M01_L09:
       call      qword ptr [7FF9749C4078]
       int       3
; Total bytes of code 317

; System.Text.StringBuilder.ToString()
       push      rsi
       push      rbx
       sub       rsp,28
       mov       rbx,rcx
       mov       ecx,[rbx+1C]
       add       ecx,[rbx+18]
       je        near ptr M02_L03
       call      System.String.FastAllocateString(Int32)
       mov       rsi,rax
M02_L00:
       mov       r8d,[rbx+18]
       test      r8d,r8d
       jle       short M02_L01
       mov       rdx,[rbx+8]
       mov       ecx,[rbx+1C]
       lea       eax,[r8+rcx]
       cmp       eax,[rsi+8]
       ja        short M02_L02
       cmp       [rdx+8],r8d
       jb        short M02_L02
       movsxd    rcx,ecx
       lea       rcx,[rsi+rcx*2+0C]
       add       rdx,10
       movsxd    r8,r8d
       add       r8,r8
; 调用Memmove
       call      qword ptr [7FF9749F5E78]; System.Buffer.Memmove(Byte ByRef, Byte ByRef, UIntPtr)
M02_L01:
       mov       rbx,[rbx+10]
       test      rbx,rbx
       jne       short M02_L00
       mov       rax,rsi
       add       rsp,28
       pop       rbx
       pop       rsi
       ret
M02_L02:
       mov       rcx,offset MT_System.ArgumentOutOfRangeException
       call      CORINFO_HELP_NEWSFAST
       mov       rbx,rax
       mov       ecx,18491
       mov       rdx,7FF9747B4000
       call      CORINFO_HELP_STRCNS
       mov       rsi,rax
       call      qword ptr [7FF9749C7C78]
       mov       r8,rax
       mov       rdx,rsi
       mov       rcx,rbx
       call      qword ptr [7FF974A36280]
       mov       rcx,rbx
       call      CORINFO_HELP_THROW
M02_L03:
       mov       rax,1F280300008
       add       rsp,28
       pop       rbx
       pop       rsi
       ret
; Total bytes of code 190


秋风 2023-11-05