在.Net 8中StringBuilder的Append性能改进
前言
马上.Net 8就要发布正式版本了,还有不少当时记录感兴趣的提交,比如StringBuilder的Append优化.改动不是不大,但对性能是有提升.主要有这两个:- Unroll StringBuilder.Append for const string (#85894)
- StringBuilder: use Span.Fill in Append repeating char (#86287)
测试代码
using System.Text;
using BenchmarkDotNet.Attributes;
namespace CSharpBenchmarks.StringTest
{
[MemoryDiagnoser]
[DisassemblyDiagnoser(printSource: true)]
public class StringBuilderTest
{
[Params(16, 128)]
public int Count { get; set; }
[Benchmark]
public void AppendTest()
{
for (int i = 0; i < Count; i++)
{
_ = Append();
}
}
public string Append()
{
StringBuilder sb = new StringBuilder(16);
_ = sb.Append("hello world!");
return sb.ToString();
}
}
}
看BenchmarkDotNet执行的结果,单次Append的结果:
看结果Appen的方法在.Net 8中比.Net 7性能提升差不多在15%左右.
下面接着看16次和128次执行结果对比:
如果对比生成的Code Size列,发现单次执行Append的大小为: .Net 7是855B, .Net 8是930B,而多次执行JIT经过分层编译(0层提升1层后,JIT会对代码进行优化)大小为: .Net 7是129B, .Net 8是689B
接下来我们分别看看.Net 7和.Net 8生成的汇编代码:
;;.Net 7
; CSharpBenchmarks.StringTest.StringBuilderTest.AppendTest()
; for (int i = 0; i < Count; i++)
; ^^^^^^^^^
; Append();
; ^^^^^^^^^
push rdi
push rsi
sub rsp,28
mov rsi,rcx
xor edi,edi
cmp dword ptr [rsi+8],0
jle short M00_L01
M00_L00:
mov rcx,rsi
call qword ptr [7FF90867D810]; CSharpBenchmarks.StringTest.StringBuilderTest.Append()
inc edi
cmp edi,[rsi+8]
jl short M00_L00
M00_L01:
add rsp,28
pop rsi
pop rdi
ret
; Total bytes of code 40
;
; CSharpBenchmarks.StringTest.StringBuilderTest.Append()
; StringBuilder sb = new StringBuilder(16);
; ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
; sb.Append("hello world!");
; ^^^^^^^^^^^^^^^^^^^^^^^^^^
push rsi
sub rsp,20
mov rcx,offset MT_System.Text.StringBuilder
call CORINFO_HELP_NEWSFAST
mov rsi,rax
mov rcx,rsi
mov edx,10
mov r8d,7FFFFFFF
;调用StringBuilder构造方法
call qword ptr [7FF908675540]; System.Text.StringBuilder..ctor(Int32, Int32)
mov rdx,1F0E4C08DB8
mov rdx,[rdx]
add rdx,0C
mov rcx,rsi
mov r8d,0C
;调用StringBuilder的Append
call qword ptr [7FF908675E10]; System.Text.StringBuilder.Append(Char ByRef, Int32)
mov rcx,rsi
add rsp,20
pop rsi
;调用ToString方法,返回string
jmp qword ptr [7FF9086817C0]; System.Text.StringBuilder.ToString()
; Total bytes of code 89
```
.Net 8汇编代码:
; .Net 8对Append方法进行了内联优化,省掉了一次方法调用
; 在使用Span.Fill填充也有优化
; CSharpBenchmarks.StringTest.StringBuilderTest.AppendTest()
; for (int i = 0; i < Count; i++)
; ^^^^^^^^^
; Append();
; ^^^^^^^^^
push rdi
push rsi
push rbx
sub rsp,20
vzeroupper
mov rbx,rcx
xor esi,esi
cmp dword ptr [rbx+8],0
jle short M00_L02
M00_L00:
mov rcx,offset MT_System.Text.StringBuilder
call CORINFO_HELP_NEWSFAST
mov rdi,rax
mov dword ptr [rdi+20],7FFFFFFF
mov edx,10
mov rcx,offset MT_System.Char[]
call CORINFO_HELP_NEWARR_1_VC
lea rcx,[rdi+8]
mov rdx,rax
call CORINFO_HELP_ASSIGN_REF
mov rdx,1F2803086FC
mov rcx,[rdi+8]
mov r8d,[rdi+18]
lea eax,[r8+0C]
cmp [rcx+8],eax
jae short M00_L03
mov rcx,rdi
mov r8d,0C
call qword ptr [7FF974D8E508]; System.Text.StringBuilder.AppendWithExpansion(Char ByRef, Int32)
M00_L01:
mov rcx,rdi
call qword ptr [7FF974D992B0]; System.Text.StringBuilder.ToString()
inc esi
cmp esi,[rbx+8]
jl short M00_L00
M00_L02:
add rsp,20
pop rbx
pop rsi
pop rdi
ret
M00_L03:
movsxd rax,r8d
lea rcx,[rcx+rax*2+10]
vmovdqu xmm0,xmmword ptr [rdx]
vmovdqu xmm1,xmmword ptr [rdx+8]
vmovdqu xmmword ptr [rcx],xmm0
vmovdqu xmmword ptr [rcx+8],xmm1
add r8d,0C
mov [rdi+18],r8d
jmp short M00_L01
; Total bytes of code 180
; 调用AppendWithExpansion(.Net 7没有这个方法)
; System.Text.StringBuilder.AppendWithExpansion(Char ByRef, Int32)
push r15
push r14
push r13
push rdi
push rsi
push rbp
push rbx
sub rsp,20
mov rbx,rcx
mov rdi,rdx
mov esi,r8d
mov ecx,[rbx+1C]
mov ebp,[rbx+18]
add ecx,ebp
add ecx,esi
cmp ecx,[rbx+20]
jg short M01_L02
cmp ecx,esi
jl short M01_L02
mov rcx,[rbx+8]
mov r14d,[rcx+8]
sub r14d,ebp
test r14d,r14d
jg near ptr M01_L03
M01_L00:
sub esi,r14d
mov rcx,rbx
mov edx,esi
call qword ptr [7FF974D8E5C8]; System.Text.StringBuilder.ExpandByABlock(Int32)
movsxd r8,r14d
lea rdx,[rdi+r8*2]
mov r8,[rbx+8]
test r8,r8
je near ptr M01_L08
lea rcx,[r8+10]
mov r15d,[r8+8]
cmp esi,r15d
ja near ptr M01_L09
M01_L01:
mov r8d,esi
add r8,r8
call qword ptr [7FF9749F5E78]; System.Buffer.Memmove(Byte ByRef, Byte ByRef, UIntPtr)
mov [rbx+18],esi
add rsp,20
pop rbx
pop rbp
pop rsi
pop rdi
pop r13
pop r14
pop r15
ret
M01_L02:
mov rcx,offset MT_System.ArgumentOutOfRangeException
call CORINFO_HELP_NEWSFAST
mov rbx,rax
mov ecx,184C1
mov rdx,7FF9747B4000
call CORINFO_HELP_STRCNS
mov rbp,rax
call qword ptr [7FF9749C7D50]
mov r8,rax
mov rdx,rbp
mov rcx,rbx
call qword ptr [7FF974A36280]
mov rcx,rbx
call CORINFO_HELP_THROW
M01_L03:
mov r15,[rbx+8]
test r15,r15
jne short M01_L04
test ebp,ebp
jne short M01_L05
xor ecx,ecx
xor r13d,r13d
jmp short M01_L07
M01_L04:
cmp [r15+8],ebp
jae short M01_L06
M01_L05:
call qword ptr [7FF9749C4060]
int 3
M01_L06:
mov r8d,ebp
lea rcx,[r15+r8*2+10]
mov r13d,[r15+8]
sub r13d,ebp
M01_L07:
cmp r14d,r13d
ja short M01_L09
mov r8d,r14d
add r8,r8
mov rdx,rdi
;调用Memmove
call qword ptr [7FF9749F5E78]; System.Buffer.Memmove(Byte ByRef, Byte ByRef, UIntPtr)
mov rax,[rbx+8]
mov eax,[rax+8]
mov [rbx+18],eax
jmp near ptr M01_L00
M01_L08:
xor ecx,ecx
xor r15d,r15d
cmp esi,r15d
jbe near ptr M01_L01
M01_L09:
call qword ptr [7FF9749C4078]
int 3
; Total bytes of code 317
; System.Text.StringBuilder.ToString()
push rsi
push rbx
sub rsp,28
mov rbx,rcx
mov ecx,[rbx+1C]
add ecx,[rbx+18]
je near ptr M02_L03
call System.String.FastAllocateString(Int32)
mov rsi,rax
M02_L00:
mov r8d,[rbx+18]
test r8d,r8d
jle short M02_L01
mov rdx,[rbx+8]
mov ecx,[rbx+1C]
lea eax,[r8+rcx]
cmp eax,[rsi+8]
ja short M02_L02
cmp [rdx+8],r8d
jb short M02_L02
movsxd rcx,ecx
lea rcx,[rsi+rcx*2+0C]
add rdx,10
movsxd r8,r8d
add r8,r8
; 调用Memmove
call qword ptr [7FF9749F5E78]; System.Buffer.Memmove(Byte ByRef, Byte ByRef, UIntPtr)
M02_L01:
mov rbx,[rbx+10]
test rbx,rbx
jne short M02_L00
mov rax,rsi
add rsp,28
pop rbx
pop rsi
ret
M02_L02:
mov rcx,offset MT_System.ArgumentOutOfRangeException
call CORINFO_HELP_NEWSFAST
mov rbx,rax
mov ecx,18491
mov rdx,7FF9747B4000
call CORINFO_HELP_STRCNS
mov rsi,rax
call qword ptr [7FF9749C7C78]
mov r8,rax
mov rdx,rsi
mov rcx,rbx
call qword ptr [7FF974A36280]
mov rcx,rbx
call CORINFO_HELP_THROW
M02_L03:
mov rax,1F280300008
add rsp,28
pop rbx
pop rsi
ret
; Total bytes of code 190
秋风
2023-11-05