源码学习.Net 6程序启动的过程

起因

在前面博文 如何编译.Net 6 Runtime源码 ,将代码切换到6.0-rc2分支上,进行.Net CoreCLR源码编译,主要通过调试corerun项目,了解和学习.Net 6运行的过程.CLR内部源码是很复杂,这里学习是如何加载coreclr.dll这一块.
下面我们先看图.

.Net 6程序运行过程,先加载coreclr.dll
通过上图知道:
  1. 先加载coreclr.dll和依赖系统的动态库
  2. 加载System.Private.CoreLib.dll,在.Net Core和.Net 5/6的时候,从mscoree.dll改为System.Private.CoreLib.dll(.Net核心库)
  3. 加载clrjit.dll,负责将IL代码转为汇编代码或者机器码
  4. 到这一步加载我们程序(main所在的程序)
  5. 后续加载System.Runtime.dll/System.Console.dll/System.Threading.dll等动态库

在.Net 6中,程序启动加载的流程是这样,但在.Net 7中会多一个clrgc.dll,在源码提交的信息中看到要将GC独立出来,这样Mono也可以使用,还可以选择使用哪种GC进行,我本地最初是在main分支编译的.

学习源码

在学习源码之前,先看一下我整理代码流程图:
corerun加载coreclr.dll运行流程
int MAIN(const int argc, const char_t* argv[])
{
    configuration config{};
    //解析命令行,获取当前程序执行的目录
    if (!parse_args(argc, argv, config))  
        return EXIT_FAILURE;
    
    //执行测试用例,需要在命令行中
    if (config.self_test)
        return self_test();

    //根据解析出的参数,加载coreclr和执行.net程序
    int exit_code = run(config);
    return exit_code;
}

下面是从源码中精简的部分源码:

//获取当前程序所在的目录
string_t get_exe_path()
{
	char_t file_name[1024];
	//这里主要以Windows平台为例,通过系统API获取当前程序所在的目录
	DWORD count = ::GetModuleFileNameW(nullptr, file_name, ARRAYSIZE(file_name));
	assert(::GetLastError() != ERROR_INSUFFICIENT_BUFFER);

	return { file_name };
}

//根据路径,查询路径文件,拼接到字符串
static string_t build_tpa(const string_t& core_root, const string_t& core_libraries)
{
	//要查找的后缀名
	static const char_t* const tpa_extensions[] =
	{
		W(".ni.dll"),  // Probe for .ni.dll first so that it's preferred if ni and il coexist in the same dir
		W(".dll"),
		W(".ni.exe"),
		W(".exe"),
		nullptr
	};

	std::set<string_t> name_set;
	pal::stringstream_t tpa_list;

	// 循环后缀名
	for (const char_t* const* curr_ext = tpa_extensions; *curr_ext != nullptr; ++curr_ext)
	{
		const char_t* ext = *curr_ext;
		const size_t ext_len = pal::strlen(ext);

		// Iterate over all supplied directories.
		for (const string_t& dir : { core_libraries, core_root })
		{
			if (dir.empty())
				continue;

			assert(dir.back() == pal::dir_delim);
			//将路径和后缀名传到build_file_list中,这个函数内部使用FindFirstFileW,根据后缀名查找文件,将路径和文件名组装好,调用回调函数
			string_t tmp = pal::build_file_list(dir, ext, [&](const char_t* file)
				{
					string_t file_local{ file };

					// Strip the extension.
					if (pal::string_ends_with(file_local, ext_len, ext))
						file_local = file_local.substr(0, file_local.length() - ext_len);
					//在这里,将组装好的dll,放到set(Set是C++的键值对,和c#的HashSet一样)
					// Return true if the file is new.
					return name_set.insert(file_local).second;
				});

			// Add to the TPA.
			tpa_list << tmp;
		}
	}

	return tpa_list.str(); //最终返回一个长字符串
}

const char_t coreclr_lib[] = W("coreclr");
const char_t nativelib_ext[] = W(".dll");

//根据路径,加载coreclr.dll,返回coreclr.dll在内存中的地址
bool try_load_coreclr(const pal::string_t& core_root, pal::mod_t& hMod)
{
	//根据路径,拼接coreclr,然后拼接.dll
	pal::string_t coreclr_path = core_root;
	pal::ensure_trailing_delimiter(coreclr_path);
	coreclr_path.append(pal::coreclr_lib);
	coreclr_path.append(pal::nativelib_ext);

	//系统API,加载dll到内存中
	hMod = (pal::mod_t)::LoadLibraryExW(coreclr_path.c_str(), nullptr, 0);
	if (hMod == nullptr)
	{
		pal::fprintf(stderr, W("Failed to load: '%s'. Error: 0x%08x\n"), coreclr_path.c_str(), ::GetLastError());
		return false;
	}

	// 看coreclr.dll在内存是否使用
	HMODULE unused;
	if (!::GetModuleHandleExW(GET_MODULE_HANDLE_EX_FLAG_PIN, coreclr_path.c_str(), &unused))
	{
		pal::fprintf(stderr, W("Failed to pin: '%s'. Error: 0x%08x\n"), coreclr_path.c_str(), ::GetLastError());
		return false;
	}

	return true;
}
}


static int run(const configuration& config)
{
	platform_specific_actions actions;

	// Check if debugger attach scenario was requested.
	if (config.wait_to_debug)
		wait_for_debugger();

	string_t exe_path = pal::get_exe_path();

	// Determine the managed application's path.
	string_t app_path;
	{
		string_t file;
		pal::split_path_to_dir_filename(config.entry_assembly_fullpath, app_path, file);
		pal::ensure_trailing_delimiter(app_path);
	}

	// Define the NI app_path.
	string_t app_path_ni = app_path + W("NI");
	pal::ensure_trailing_delimiter(app_path_ni);
	app_path_ni.append(1, pal::env_path_delim);
	app_path_ni.append(app_path);

	// Accumulate path for native search path.
	pal::stringstream_t native_search_dirs;
	native_search_dirs << app_path << pal::env_path_delim;

	// CORE_LIBRARIES
	string_t core_libs = pal::getenv(envvar::coreLibraries);
	if (!core_libs.empty() && core_libs != app_path)
	{
		pal::ensure_trailing_delimiter(core_libs);
		native_search_dirs << core_libs << pal::env_path_delim;
	}

	// Determine CORE_ROOT.
	// Check if the path is user supplied and if not try
	// the CORE_ROOT environment variable.
	string_t core_root = !config.clr_path.empty()
		? config.clr_path
		: pal::getenv(envvar::coreRoot);

	// If CORE_ROOT wasn't supplied use the exe binary path, otherwise
	// ensure path is valid and add to native search path.
	if (core_root.empty())
	{
		string_t file;
		pal::split_path_to_dir_filename(exe_path, core_root, file);
		pal::ensure_trailing_delimiter(core_root);
	}
	else
	{
		pal::ensure_trailing_delimiter(core_root);
		native_search_dirs << core_root << pal::env_path_delim;
	}

	string_t tpa_list = build_tpa(core_root, core_libs);
	{
		// Load hostpolicy if requested.
		string_t mock_hostpolicy = pal::getenv(envvar::mockHostPolicy);
		if (!mock_hostpolicy.empty()
			&& !pal::try_load_hostpolicy(mock_hostpolicy))
		{
			return -1;
		}
	}

	actions.before_coreclr_load();

	// Attempt to load CoreCLR.
	pal::mod_t coreclr_mod;
	//加载coreclr.dll
	if (!pal::try_load_coreclr(core_root, coreclr_mod))
	{
		return -1;
	}

	// 1.先声明函数指针
	coreclr_initialize_ptr coreclr_init_func = nullptr;
	coreclr_execute_assembly_ptr coreclr_execute_func = nullptr;
	coreclr_shutdown_2_ptr coreclr_shutdown2_func = nullptr;

	// 2. 根据函数名称获取在内存中的函数地址(Windows平台,通过GetProcAddress获取内存中的函数地址),并将函数地址转为函数指针
	if (!try_get_export(coreclr_mod, "coreclr_initialize", (void**)&coreclr_init_func)
		|| !try_get_export(coreclr_mod, "coreclr_execute_assembly", (void**)&coreclr_execute_func)
		|| !try_get_export(coreclr_mod, "coreclr_shutdown_2", (void**)&coreclr_shutdown2_func))
	{
		return -1;
	}

	// 处理CoreCLR所需的属性
	// 获取系统环境变量

	logger_t logger{
		exe_path_utf8.c_str(),
		propertyCount, propertyKeys.data(), propertyValues.data(),
		entry_assembly_utf8.c_str(), config.entry_assembly_argc, argv_utf8.get() };

	int result;
	//对CoreCLR环境进行初始化
	result = coreclr_init_func(
		exe_path_utf8.c_str(),
		"corerun",
		propertyCount,
		propertyKeys.data(),
		propertyValues.data(),
		&CurrentClrInstance,
		&CurrentAppDomainId);
	if (FAILED(result))
	{
		pal::fprintf(stderr, W("BEGIN: coreclr_initialize failed - Error: 0x%08x\n"), result);
		logger.dump_details();
		pal::fprintf(stderr, W("END: coreclr_initialize failed - Error: 0x%08x\n"), result);
		return -1;
	}

	int exit_code;
	{

		actions.before_execute_assembly(config.entry_assembly_fullpath);
		//从这里,开始准备执行.Net程序
		result = coreclr_execute_func(
			CurrentClrInstance,
			CurrentAppDomainId,
			config.entry_assembly_argc,
			argv_utf8.get(),
			entry_assembly_utf8.c_str(),
			(uint32_t*)&exit_code);
		if (FAILED(result))
		{
			pal::fprintf(stderr, W("BEGIN: coreclr_execute_assembly failed - Error: 0x%08x\n"), result);
			logger.dump_details();
			pal::fprintf(stderr, W("END: coreclr_execute_assembly failed - Error: 0x%08x\n"), result);
			return -1;
		}

		actions.after_execute_assembly();
	}

	int latched_exit_code = 0;
	//对CoreCLR资源释放
	result = coreclr_shutdown2_func(CurrentClrInstance, CurrentAppDomainId, &latched_exit_code);
	if (FAILED(result))
	{
		pal::fprintf(stderr, W("coreclr_shutdown_2 failed - Error: 0x%08x\n"), result);
		exit_code = -1;
	}

	if (exit_code != -1)
		exit_code = latched_exit_code;

	return exit_code;
}

后面我们在继续深入学习.Net源码.

秋风 2021-11-07