String.Create真的快吗?
起因
本篇博文只是 在.Net 中使用正则表达式注意事项 后续,是因为在.Net Framework 4.8中不支持,所以才没有使用String.Create和字符数组创建新字符串进行对比.由于后面还是进行了基准测试.发现String.Create在性能上和字符数组加String构造函数没有任何优势.String.Create是一个相对较新的API的,在.Net Core 2.1及之后的版本才支持的.知道String.Create还是在博客园看到博文说的,String.Create性能是很好的,只是对比StringBuilder/String.Contact,没看到具体的示例代码,这个还是持有保留意见.因为做性能测试代码可能不是通用的(每个人用法不同,硬件不同,得出的结果也自然不同),只是我想对这一块进行一个性能测试.
String.Create 使用
我们先看如何使用String.Create? 在Create函数转到定义,看到一下//
// Summary:
// Creates a new string with a specific length and initializes it after creation
// by using the specified callback.
// 创建一个具有特定长度的新字符串,并在创建后使用指定的回调函数对其进行初始化
// Parameters:
// length:
// The length of the string to create.
//
// state:
// The element to pass to action.
//
// action:
// A callback to initialize the string.
//
// Type parameters:
// TState:
// The type of the element to pass to action.
//
// Returns:
// The created string.
public static string Create<TState>(int length, TState state, SpanAction<char, TState> action);
String.Create文档地址: String.Create
下面看如何使用:
public string CreateReplace(string input)
{
int len = input.Length;
//使用string.create
//传入要创建的字符串的长度
//传入根据原有的字符串
//在委托(回调函数),根据原有的字符串,创建新字符串
return string.Create(len, input, (target, src) =>
{
int postion = -1;
for (int i = 0; i < len; i++)
{
if (src[i] >= 'A' && src[i] <= 'Z')
{
target[i] = src[i];
}
else
{
postion = i;
break;
}
}
if (postion > 0)
{
target[postion] = 'R';
}
int start = postion + 1;
for (int i = start; i < len; i++)
{
target[i] = src[i];
}
});
}
进行String.Create和字符数组通过String构造函数创建新字符串性能对比
using BenchmarkDotNet.Attributes;
namespace dotnet_perf
{
[MemoryDiagnoser]
[DisassemblyDiagnoser(printSource: true)]
public class StringCreateComment
{
[Params(4096)]
public int Count { get; set; }
public static string OrderNum = "FA210609";
[Benchmark(Baseline = true)]
public void Replace3()
{
for (int i = 0; i < Count; i++)
{
ArrayReplace(OrderNum);
}
}
[Benchmark]
public void Replace4()
{
for (int i = 0; i < Count; i++)
{
CreateReplace(OrderNum);
}
}
public string ArrayReplace(string input)
{
int len = input.Length;
int postion = -1;
char[] arr = new char[len];
for (int i = 0; i < len; i++)
{
if (input[i] >= 'A' && input[i] <= 'Z')
{
arr[i] = input[i];
}
else
{
postion = i;
break;
}
}
if (postion > 0)
{
arr[postion] = 'R';
}
int start = postion + 1;
for (int i = start; i < len; i++)
{
arr[i] = input[i];
}
return new string(arr, 0, len); //通过字符串的构造函数,创建新字符串
}
public string CreateReplace(string input)
{
int len = input.Length;
//使用string.create
//传入要创建的字符串的长度
//传入根据原有的字符串
//在委托(回调函数),根据原有的字符串,创建新字符串
return string.Create(len, input, (target, src) =>
{
int postion = -1;
for (int i = 0; i < len; i++)
{
if (src[i] >= 'A' && src[i] <= 'Z')
{
target[i] = src[i];
}
else
{
postion = i;
break;
}
}
if (postion > 0)
{
target[postion] = 'R';
}
int start = postion + 1;
for (int i = start; i < len; i++)
{
target[i] = src[i];
}
});
}
}
}
从上图看出,String.Create和字符数组加String构造函数创建新字符串在.Net Core 3.1/.Net 5及.Net 6在耗时/内存使用及0代GC都是完败.具体在耗时上String.Create相差10%,在内存使用和0代GC回收上相差37.5%左右.
使用String.Create为什么会没有直接使用字符数组加构造函数性能好呢?带着这个疑问,我们去看看String.Create和String构造函数的源码.
先看看String.Create的源码:
public static string Create<TState>(int length, TState state, SpanAction<char, TState> action)
{
if (action == null)
throw new ArgumentNullException(nameof(action));
if (length <= 0)
{
if (length == 0)
return Empty;
throw new ArgumentOutOfRangeException(nameof(length));
}
string result = FastAllocateString(length); //先根据长度,进行内存分配,这里就不去看FastAllocateString源码,是因为这个源码是在CoreCLR中
action(new Span<char>(ref result.GetRawStringData(), length), state); //执行外部委托函数
return result;
}
在看看String 2个构造函数:
[MethodImpl(MethodImplOptions.InternalCall)]
[DynamicDependency("Ctor(System.Char[])")]
public extern String(char[]? value);
#pragma warning disable CA1822 // Mark members as static
private
#if !CORECLR
static
#endif
string Ctor(char[]? value)
{
if (value == null || value.Length == 0)
return Empty;
string result = FastAllocateString(value.Length); //1. 根据长度,先分配内存
Buffer.Memmove(
elementCount: (uint)result.Length, // derefing Length now allows JIT to prove 'result' not null below
destination: ref result._firstChar,
source: ref MemoryMarshal.GetArrayDataReference(value)); //2. 将字符数组内容,拷贝到分配好的内存空间中
return result; //3. 返回新的字符串
}
[MethodImpl(MethodImplOptions.InternalCall)]
[DynamicDependency("Ctor(System.Char[],System.Int32,System.Int32)")]
public extern String(char[] value, int startIndex, int length);
private
#if !CORECLR
static
#endif
string Ctor(char[] value, int startIndex, int length)
{
if (value == null)
throw new ArgumentNullException(nameof(value));
if (startIndex < 0)
throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_StartIndex);
if (length < 0)
throw new ArgumentOutOfRangeException(nameof(length), SR.ArgumentOutOfRange_NegativeLength);
if (startIndex > value.Length - length)
throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
if (length == 0)
return Empty;
string result = FastAllocateString(length); //1. 根据长度,先分配内存
Buffer.Memmove(
elementCount: (uint)result.Length, // derefing Length now allows JIT to prove 'result' not null below
destination: ref result._firstChar,
source: ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(value), startIndex)); ////2. 将字符数组内容,拷贝到分配好的内存空间中
return result; //3. 返回新的字符串
}
在看完String.Create和String构造函数的源码后,我们发现性能应该是相差不大的,在外部使用的代码也相差不大.区别就是在String.Create在使用的时候,有委托的,那开销就在委托上的呢?
那我们先看看String.Create生成的IL代码:
.method public hidebysig
instance string Create (
string input
) cil managed
{
// Method begins at RVA 0x2440
// Code size 43 (0x2b)
.maxstack 4
.locals init (
[0] class dotnet_perf.StringCreateTest/'<>c__DisplayClass9_0' 'CS{content}lt;>8__locals0'
)
IL_0000: newobj instance void dotnet_perf.StringCreateTest/'<>c__DisplayClass9_0'::.ctor()
IL_0005: stloc.0
IL_0006: ldloc.0
IL_0007: ldarg.1
IL_0008: callvirt instance int32 [System.Runtime]System.String::get_Length()
IL_000d: stfld int32 dotnet_perf.StringCreateTest/'<>c__DisplayClass9_0'::len
IL_0012: ldloc.0
IL_0013: ldfld int32 dotnet_perf.StringCreateTest/'<>c__DisplayClass9_0'::len
IL_0018: ldarg.1
IL_0019: ldloc.0
IL_001a: ldftn instance void dotnet_perf.StringCreateTest/'<>c__DisplayClass9_0'::'<Create>b__0'(valuetype [System.Runtime]System.Span`1<char>, string)
IL_0020: newobj instance void class [System.Runtime]System.Buffers.SpanAction`2<char, string>::.ctor(object, native int) //***果真要实例SpanAction类型实例
IL_0025: call string [System.Runtime]System.String::Create<string>(int32, !!0, class [System.Runtime]System.Buffers.SpanAction`2<char, !!0>)
IL_002a: ret
} // end of method StringCreateTest::Create
知道String.Create瓶颈在哪里,我们把代码进行了改进,给Create函数的委托加缓存,在进行一次基准测试.
using System;
using System.Buffers;
using BenchmarkDotNet.Attributes;
namespace dotnet_perf
{
[MemoryDiagnoser]
[DisassemblyDiagnoser(printSource: true)]
public class StringCreateTest
{
[Params(4096)]
public int Count { get; set; }
public static string OrderNum = "FA210609";
[Benchmark(Baseline = true)]
public void ArrayTest()
{
for (int i = 0; i < Count; i++)
{
Array(OrderNum);
}
}
[Benchmark]
public void CreateTest()
{
for (int i = 0; i < Count; i++)
{
Create(OrderNum);
}
}
[Benchmark]
public void CreateCaheTest()
{
for (int i = 0; i < Count; i++)
{
CreateCache(OrderNum);
}
}
public string Array(string input)
{
int len = input.Length;
int postion = -1;
char[] arr = new char[len];
for (int i = 0; i < len; i++)
{
if (input[i] >= 'A' && input[i] <= 'Z')
{
arr[i] = input[i];
}
else
{
postion = i;
break;
}
}
if (postion > 0)
{
arr[postion] = 'R';
}
int start = postion + 1;
for (int i = start; i < len; i++)
{
arr[i] = input[i];
}
return new string(arr, 0, len);
}
public string Create(string input)
{
int len = input.Length;
return string.Create(len, input, (target, src) =>
{
int postion = -1;
for (int i = 0; i < len; i++)
{
if (src[i] >= 'A' && src[i] <= 'Z')
{
target[i] = src[i];
}
else
{
postion = i;
break;
}
}
if (postion > 0)
{
target[postion] = 'R';
}
int start = postion + 1;
for (int i = start; i < len; i++)
{
target[i] = src[i];
}
});
}
public void CreateAction(Span<char> target, string src)
{
int postion = -1;
for (int i = 0; i < src.Length; i++)
{
if (src[i] >= 'A' && src[i] <= 'Z')
{
target[i] = src[i];
}
else
{
postion = i;
break;
}
}
if (postion > 0)
{
target[postion] = 'R';
}
int start = postion + 1;
for (int i = start; i < src.Length; i++)
{
target[i] = src[i];
}
}
//给Create所需的委托加缓存,而不是每次都创建新的委托实例
public SpanAction<char, string> spanAction = null;
public string CreateCache(string input)
{
int len = input.Length;
if (spanAction == null) //只有为空的,才会创建委托实例
{
spanAction = CreateAction; //缓存委托实例
}
return string.Create(len, input, spanAction);
}
}
}
从上图看出,缓存过的String.Create在整体上都提高不少,在耗时上竟然比字符数组加构造函数提高了47%,内存使用和0代GC回收减少了2倍.尤其是在.Net 6上提升尤为厉害.难道测试的姿势不对吗?
性能优化是有必要的,但不能过度优化.代码是给人看的,所以优化和易读要进行平衡取舍.
秋风
2021-06-19