在.Net 7 正则表达式性能改进
起因
前天.Net 7发布了preview 2,昨天就卸载了preview 1,然后安装preview 2,今天看到微软博客有关于.Net 7 preview 2改进的相关介绍.便使用BenchmarkDotNet进行基准测试.测试代码
using System.Text.RegularExpressions;
using BenchmarkDotNet.Attributes;
namespace net6perf.Synax
{
[DisassemblyDiagnoser(printSource: true, maxDepth: 3)]
[MemoryDiagnoser]
public partial class RegexTest
{
[Params(1024, 2048, 4096)]
public int Count { get; set; }
[Params("qwe", "abc", "xyz")]
public string? Input { get; set; }
public Regex regex = new Regex(@"abc|def", RegexOptions.IgnoreCase);
//在.Net 7加入 正则表达式代码生成,简单不单独说明怎么使用
//1. 使用RegexGenerator特性
[RegexGenerator(@"abc|def", RegexOptions.IgnoreCase)]
//2. 使用static和partial进行声明
public static partial Regex MyRegex();
[Benchmark(Baseline = true)]
public void Before()
{
int sum = 0;
for (int i = 0; i < Count; i++)
{
if (regex.IsMatch(Input!))
{
sum += i;
}
}
}
[Benchmark]
public void After()
{
int sum = 0;
for (int i = 0; i < Count; i++)
{
if (MyRegex().IsMatch(Input!))
{
sum += i;
}
}
}
}
}
基准测试结果:
在另外一台机器测试,得出的结果也差不多.可以看到使用新方式,性能最高提升54%,最低也有20%的提升.通过代码生成的方式对性能提升还是可以的.
为什么是代码生成呢?
反编译一下程序集(代码):public class Program
{
[GeneratedCode("System.Text.RegularExpressions.Generator", "7.0.6.15202")]
[EditorBrowsable(EditorBrowsableState.Never)]
private sealed class GeneratedRegex_MyRegex_D859DD68 : Regex //1. 继承Regex
{
private sealed class RunnerFactory : RegexRunnerFactory //2. 继承RegexRunnerFactory
{
private sealed class Runner : RegexRunner //3. 继承RegexRunner
{
protected override void InitTrackCount() //4. 重写InitTrackCount
{
runtrackcount = 7; //确定表达式的长度
}
protected override bool FindFirstChar() //5. 重写FindFristChar
{
int pos = runtextpos;
int end = runtextend;
ReadOnlySpan<char> inputSpan = runtext;
if (pos < end - 2)
{
ReadOnlySpan<char> span = inputSpan.Slice(pos, end - pos);
int i;
for (i = 0; i < span.Length - 2; i++)
{
int indexOfPos = span.Slice(i + 2).IndexOfAny("CFcf"); //CFcf是根据匹配内容由编译器生成, ab c| de f
if (indexOfPos < 0)
{
break;
}
i += indexOfPos;
if (i >= span.Length - 2)
{
break;
}
char ch;
if ((ch = span[i]) < '\u0080' && ("\0\0\0\0\u0012\0\u0012\0"[(int)ch >> 4] & (1 << (ch & 0xF))) != 0 && (ch = span[i + 1]) < '\u0080' && ("\0\0\0\0$\0$\0"[(int)ch >> 4] & (1 << (ch & 0xF))) != 0)
{
runtextpos = pos + i;
return true;
}
}
}
runtextpos = end;
return false;
}
[SkipLocalsInit] //这里有优化,使用SkipLocalsInit特性,对方法内局部变量不初始化
protected override void Go()
{
ReadOnlySpan<char> inputSpan = runtext;
int pos = runtextpos;
int end = runtextend;
int original_pos = pos;
TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;
ReadOnlySpan<char> slice = inputSpan.Slice(pos, end - pos);
if (slice.IsEmpty)
{
return;
}
switch (slice[0]) //有编译器编译时根据表达式生成
{
default:
return;
case 'A':
case 'a':
if ((uint)slice.Length < 3u || (slice[1] | 0x20) != 98 || (slice[2] | 0x20) != 99)
{
return;
}
pos += 3;
slice = slice.Slice(3);
break;
case 'D':
case 'd':
if ((uint)slice.Length < 3u || (slice[1] | 0x20) != 101 || (slice[2] | 0x20) != 102)
{
return;
}
pos += 3;
slice = slice.Slice(3);
break;
}
runtextpos = pos;
Capture(0, original_pos, pos);
}
}
protected override RegexRunner CreateInstance()
{
return new Runner();
}
}
//单例
public static Regex Instance { get; } = new GeneratedRegex_MyRegex_D859DD68();
private GeneratedRegex_MyRegex_D859DD68()
{
pattern = "abc|def"; //匹配表达式
roptions = RegexOptions.IgnoreCase | RegexOptions.Compiled; //编译选项
internalMatchTimeout = Timeout.InfiniteTimeSpan;
factory = new RunnerFactory();
capsize = 1;
}
}
[RegexGenerator("abc|def", RegexOptions.IgnoreCase)]
[GeneratedCode("System.Text.RegularExpressions.Generator", "7.0.6.15202")]
public static Regex MyRegex()
{
return GeneratedRegex_MyRegex_D859DD68.Instance;
}
private static void Main(string[] args)
{
Console.WriteLine(MyRegex().IsMatch("xyz"));
}
}
调用流程图

秋风
2022-03-18