在.Net 7中bool类型源码优化

起因

代码总是从简单到复杂,从易读到晦涩,有的是业务逻辑复杂导致代码也复杂,有的是为了性能优化,导致代码不那么易读易懂了.这里主要是看到最近.Net bool源码变化.
  1. Streamline bool.TryParse/Format (#64782)
  2. Fix bool.TryParse/Format on big-endian systems (#65078)

在.Net 6中TryParse

public static bool TryParse(ReadOnlySpan<char> value, out bool result)
{
	if (IsTrueStringIgnoreCase(value))  //关注IsTrueStringIgnoreCase
	{
		result = true;
		return true;
	}
	if (IsFalseStringIgnoreCase(value)) //关注和IsFalseStringIgnoreCase
	{
		result = false;
		return true;
	}
	value = TrimWhiteSpaceAndNull(value);
	if (IsTrueStringIgnoreCase(value))
	{
		result = true;
		return true;
	}
	if (IsFalseStringIgnoreCase(value))
	{
		result = false;
		return true;
	}
	result = false;
	return false;
}

在TryParse中,调用IsTrueStringIgnoreCase和IsFalseStringIgnoreCase这两个函数这时候是易读易懂.

internal static bool IsFalseStringIgnoreCase(ReadOnlySpan<char> value)
{
    //如果为假,先判断长度,逐个字符进行大小写判断
    return value.Length == 5 &&
            (value[0] == 'f' || value[0] == 'F') &&
            (value[1] == 'a' || value[1] == 'A') &&
            (value[2] == 'l' || value[2] == 'L') &&
            (value[3] == 's' || value[3] == 'S') &&
            (value[4] == 'e' || value[4] == 'E');
}

internal static bool IsTrueStringIgnoreCase(ReadOnlySpan<char> value)
{
    //为真时,先判断长度,逐个字符进行大小写判断
    return value.Length == 4 &&
            (value[0] == 't' || value[0] == 'T') &&
            (value[1] == 'r' || value[1] == 'R') &&
            (value[2] == 'u' || value[2] == 'U') &&
            (value[3] == 'e' || value[3] == 'E');
}

在.Net 7中TryParse

internal static bool IsTrueStringIgnoreCase(ReadOnlySpan<char> value)
{
    // "true" as a ulong, each char |'d with 0x0020 for case-insensitivity
    // 先判断cpu支持大小端模式, 采用无符号long类型存储4个字符的ASCII码值 小端:: 65→e 75→u   72→r  74→t  
    // 将value转为byte数组,读取为ulong类型,然后或运算, 0x20为十六进制 是十进制32  或运算:: A | 32 = a  这样就不用区分大小写
    // 0x0020002000200020为4个0x20,因为值一样,不区分大小端
    ulong true_val = BitConverter.IsLittleEndian ? 0x65007500720074ul : 0x74007200750065ul;
    return value.Length == 4 &&
            (MemoryMarshal.Read<ulong>(MemoryMarshal.AsBytes(value)) | 0x0020002000200020) == true_val;
}

internal static bool IsFalseStringIgnoreCase(ReadOnlySpan<char> value)
{
    // "fals" as a ulong, each char |'d with 0x0020 for case-insensitivity
    // ulong类型只能存4个字符,所以存储 "fals"这4个字符  73→s  6c→l 61→a 66→f 
    // 最后1个字符进行与0x20进行或运算,得到小写字符和'e'进行判断
    ulong fals_val = BitConverter.IsLittleEndian ? 0x73006C00610066ul : 0x660061006C0073ul;
    return value.Length == 5 &&
            (((MemoryMarshal.Read<ulong>(MemoryMarshal.AsBytes(value)) | 0x0020002000200020) == fals_val) &
            ((value[4] | 0x20) == 'e'));
}

public static bool TryParse([NotNullWhen(true)] string? value, out bool result) =>
    TryParse(value.AsSpan(), out result);

public static bool TryParse(ReadOnlySpan<char> value, out bool result)
{    
    if (IsTrueStringIgnoreCase(value))
    {
        result = true;
        return true;
    }

    if (IsFalseStringIgnoreCase(value))
    {
        result = false;
        return true;
    }
    //非正常处理
    return TryParseUncommon(value, out result);

    static bool TryParseUncommon(ReadOnlySpan<char> value, out bool result)
    {
        // With "true" being 4 characters, even if we trim something from <= 4 chars,
        // it can't possibly match "true" or "false".
        int originalLength = value.Length;
        if (originalLength >= 5)
        {
            value = TrimWhiteSpaceAndNull(value);
            if (value.Length != originalLength)
            {
                // Something was trimmed.  Try matching again.
                if (IsTrueStringIgnoreCase(value))
                {
                    result = true;
                    return true;
                }

                result = false;
                return IsFalseStringIgnoreCase(value);
            }
        }

        result = false;
        return false;
    }
}

private static ReadOnlySpan<char> TrimWhiteSpaceAndNull(ReadOnlySpan<char> value)
{
    int start = 0;
    while (start < value.Length)
    {
        if (!char.IsWhiteSpace(value[start]) && value[start] != '\0')
        {
            break;
        }
        start++;
    }

    int end = value.Length - 1;
    while (end >= start)
    {
        if (!char.IsWhiteSpace(value[end]) && value[end] != '\0')
        {
            break;
        }
        end--;
    }

    return value.Slice(start, end - start + 1);
}


在.Net 6中TryFormat

public bool TryFormat(Span<char> destination, out int charsWritten)
{
	if (m_value) //为真
	{
		if ((uint)destination.Length > 3u)
		{
                        //进行4次赋值操作
			destination[0] = 'T';
			destination[1] = 'r';
			destination[2] = 'u';
			destination[3] = 'e';
			charsWritten = 4;
			return true;
		}
	}
	else if ((uint)destination.Length > 4u)  //不为真
	{
                //进行5次赋值操作
		destination[0] = 'F';
		destination[1] = 'a';
		destination[2] = 'l';
		destination[3] = 's';
		destination[4] = 'e';
		charsWritten = 5;
		return true;
	}
	charsWritten = 0;
	return false;
}


在.Net 7中TryFormat

public bool TryFormat(Span<char> destination, out int charsWritten)
{
    if (m_value)
    {
        if ((uint)destination.Length > 3) // uint cast, per https://github.com/dotnet/runtime/issues/10596
        {
            //先判断cpu支持大小端模式, 采用无符号long类型存储4个字符的ASCII码值 小端:: 65→e 75→u   72→r  74→t  
            //将Span<char>转为byte数组,将true_val写入,减少赋值的次数,与.Net 6中4次赋值操作,这里只有1次
            ulong true_val = BitConverter.IsLittleEndian ? 0x65007500720054ul : 0x54007200750065ul; // "True"
            MemoryMarshal.Write<ulong>(MemoryMarshal.AsBytes(destination), ref true_val);
            charsWritten = 4;
            return true;
        }
    }
    else
    {
        if ((uint)destination.Length > 4)
        {
            //不为真时,进行2次赋值操作 第1次 ulong 只能存4个字符, 第2次通过下标赋值为'e'
            ulong fals_val = BitConverter.IsLittleEndian ? 0x73006C00610046ul : 0x460061006C0073ul; // "Fals"
            MemoryMarshal.Write<ulong>(MemoryMarshal.AsBytes(destination), ref fals_val);
            destination[4] = 'e';
            charsWritten = 5;
            return true;
        }
    }

    charsWritten = 0;
    return false;
}

在.Net 7中对bool的改进,就是减少赋值操作,将4个字符转为ulong,实现了一次将4个字符赋值.还有通过位或操作巧妙的实现将大写字母进行转换.这种实现不是第一次看到,曾在wrk(压力工具)看到:

#define LOWER(c)            (unsigned char)(c | 0x20)


秋风 2022-02-14