Linux内核模块的加载过程

文章由LinuxBoy分享于2019-03-31 01:03:34热评（622）

Linux内核模块的加载过程

前段时间为了解决内核模块无法卸载的问题，对模块的加载过程详细地学习了一番。加载模块时常用的命令是insmod和modprobe，这两个命令主要是通过系统调用sys_init_module()来完成主要的工作，用户层做的更多的是对参数的处理，以及将插入的模块加入到内存中。系统调用sys_init_module()将大部分工作委托给load_module()函数来完成，load_module()中的操作，大部分是围绕着ELF文件的格式来完成的，所以如果对ELF文件了解的话，看load_module()的过程很容易。下面将我对load_module()的一些理解贴出来和大家分享一下，注释比较详细，就不多说了：

/* Allocate and load the module: note that size of section 0 is always
zero, and we rely on this for optional sections. */
/*
* load_module()负责最艰苦的模块加载全过程。sys_init_module()调用load_module()，
* 后者将在内核空间利用vmalloc分配一块大小同样为len的地址空间。然后通过
* copy_from_user函数的调用将用户空间的文件数据复制到内核空间中，从而在内核空间
* 构造出内核模块的一个ELF静态的内存视图。接下来的操作都将以此视图为基础，为使
* 叙述简单起见，我们称该视图为HDR视图。HDR视图所占用的内存空间在load_module结束时
* 通过vfree予以释放。
*/
static noinline struct module *load_module(void __user *umod,
unsigned long len,
const char __user *uargs)
{
/*
* ELF文件头地址。
*/
Elf_Ehdr *hdr;
/*
* 段首部表地址
*/
Elf_Shdr *sechdrs;
char *secstrings, *args, *modmagic, *strtab = NULL;
char *staging;
unsigned int i;
unsigned int symindex = 0;
unsigned int strindex = 0;
unsigned int modindex, versindex, infoindex, pcpuindex;
struct module *mod;
long err = 0;
void *percpu = NULL, *ptr = NULL; /* Stops spurious gcc warning */
unsigned long symoffs, stroffs, *strmap;

mm_segment_t old_fs;

DEBUGP("load_module: umod=%p, len=%lu, uargs=%p\n",
umod, len, uargs);
/*
* 如果len小于ELF文件首部长度，则返回ENOEXEC错误。
*/
if (len < sizeof(*hdr))
return ERR_PTR(-ENOEXEC);

/* Suck in entire file: we'll want most of it. */
/* vmalloc barfs on "unusual" numbers. Check here */
/*
* 64 * 1024 * 1024应该是模块文件的最大大小。
*/
if (len > 64 * 1024 * 1024 || (hdr = vmalloc(len)) == NULL)
return ERR_PTR(-ENOMEM);

/*
* 将模块文件从用户空间拷贝到分配的hdr中。
*/
if (copy_from_user(hdr, umod, len) != 0) {
err = -EFAULT;
goto free_hdr;
}

/* Sanity checks against insmoding binaries or wrong arch,
weird elf version */
/*
* 检查文件标识是否是ELFMAG，检查模块目标文件是否是可重定向文件，
* 检查目标文件的体系结构类型，检查ELF首部中段首部表中表项的大小，
* 如果其中一项检查失败，则返回ENOEXEC。
*/
if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
|| hdr->e_type != ET_REL
|| !elf_check_arch(hdr)
|| hdr->e_shentsize != sizeof(*sechdrs)) {
err = -ENOEXEC;
goto free_hdr;
}

/*
* hdr->e_shnum * sizeof(Elf_Shdr)计算的是ELF文件中段首部表的大小，
* 加上偏移的值如果大于len，则说明模块目标文件被截断了，跳转到
* truncated标签处处理
*/
if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr))
goto truncated;

/* Convenience variables */
/*
* 计算段首部表的地址.
*/
sechdrs = (void *)hdr + hdr->e_shoff;
/*
* 计算段名称字符串表的地址，其中hdr->e_shstrndx是段名称字符串表在段首部表中
* 的索引，sh_offset是当前段相对于文件头的偏移。
*/
secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
/*
* 将第一个段在执行时的虚拟地址设为0，不使用段首部表中的第一个表项。
*/
sechdrs[0].sh_addr = 0;

/*
* 开始遍历段首部表， hdr->e_shnum是段首部表表项的数量
*/
for (i = 1; i < hdr->e_shnum; i++) {
/*
* 如果索引为i的段需要在文件中占据空间，但是文件长度小于
* 段的偏移加上段大小(也就是说文件长度不够)，则跳转到
* truncated标签处处理
*/
if (sechdrs[i].sh_type != SHT_NOBITS
&& len < sechdrs[i].sh_offset + sechdrs[i].sh_size)
goto truncated;

/* Mark all sections sh_addr with their address in the
temporary image. */
/*
* 将段在执行时的虚拟地址设为他们在临时内存映像中的地址.
*/
sechdrs[i].sh_addr = (size_t)hdr + sechdrs[i].sh_offset;

/* Internal symbols and strings. */
/*
* 如果索引为i的段是符号表,则做相应的处理.目前目标文件只能有一个符号表，
* 这个限制以后可能会有变化，所以下面的语句只会执行一次。
*/
if (sechdrs[i].sh_type == SHT_SYMTAB) {
/*
* 用来保存符号表在段首部表中的索引
*/
symindex = i;
/*
* strindex存储的是与当前段段相关的字符串表段的索引。
*/
strindex = sechdrs[i].sh_link;
/*
* strtab存储的是与当前段相关的字符串表段的地址。
*/
strtab = (char *)hdr + sechdrs[strindex].sh_offset;
}
#ifndef CONFIG_MODULE_UNLOAD
/* Don't load .exit sections */
/*
* 如果当前段是".exit"段(前缀是".exit")，则在段的标志中移除SHF_ALLOC
* 标志，意思是当前段在执行过程中不需要占用内存。
*/
if (strstarts(secstrings+sechdrs[i].sh_name, ".exit"))
sechdrs[i].sh_flags &= ~(unsigned long)SHF_ALLOC;
#endif
}
/*
* 查找".gnu.linkonce.this_module"段在段首部表中的索引
*/
modindex = find_sec(hdr, sechdrs, secstrings,
".gnu.linkonce.this_module");
if (!modindex) {
printk(KERN_WARNING "No module found in object\n");
err = -ENOEXEC;
goto free_hdr;
}
/* This is temporary: point mod into copy of data. */
/*
* 将模块的地址暂时设为临时映像中段给出的地址。
*/
mod = (void *)sechdrs[modindex].sh_addr;

/*
* 如果没有找到符号表段，则跳转到free_hdr处处理
*/
if (symindex == 0) {
printk(KERN_WARNING "%s: module has no symbols (stripped?)\n",
mod->name);
err = -ENOEXEC;
goto free_hdr;
}

/*
* 查找__versions段在段首部表中的索引
*/
versindex = find_sec(hdr, sechdrs, secstrings, "__versions");
/*
* 查找.modinfo段在段首部表中的索引
*/
infoindex = find_sec(hdr, sechdrs, secstrings, ".modinfo");
/*
* 查找".data.percpu"段在段首部表中的索引
*/
pcpuindex = find_pcpusec(hdr, sechdrs, secstrings);

/* Don't keep modinfo and version sections. */
/*
* "__versions"和".modinfo"段在执行时不需要，因此移除SHF_ALLOC标志。
*/
sechdrs[infoindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
sechdrs[versindex].sh_flags &= ~(unsigned long)SHF_ALLOC;

/* Check module struct version now, before we try to use module. */
/*
* 检查模块的版本信息。
*/
*
if (!check_modstruct_version(sechdrs, versindex, mod)) {
err = -ENOEXEC;
goto free_hdr;
}

/*
* 在.modinfo段查找vermagic变量对应的值。
*/
modmagic = get_modinfo(sechdrs, infoindex, "vermagic");
/* This is allowed: modprobe --force will invalidate it. */
if (!modmagic) {
/*
* 如果没有找到vermagic变量，则尝试强制加载模块。
* 但是try_to_force_load()函数的实现依赖于CONFIG_MODULE_FORCE_LOAD
* 宏是否定义。而该宏默认是没有定义的，所以这里会
* 返回失败，看来内核并不推荐强制加载模块。
*/
err = try_to_force_load(mod, "bad vermagic");
if (err)
goto free_hdr;
} else if (!same_magic(modmagic, vermagic, versindex)) {
printk(KERN_ERR "%s: version magic '%s' should be '%s'\n",
mod->name, modmagic, vermagic);
err = -ENOEXEC;
goto free_hdr;
}

/*
* 在.modinfo段查找staging变量对应的值。
*/
staging = get_modinfo(sechdrs, infoindex, "staging");
if (staging) {
/*
* 从2.6.28版本起，内核代码的drivers下增加了一个staging目录，
* 这个目录也是用来存放驱动程序，只是这里的驱动程序
* 和上层目录不同，加载的时候内核日志会打印如下的语句:
* MODULE_NAME: module is from the staging directory, the quality is unknown, you have been warned.
* Greg KH于2008年6月10号在Linux内核邮件列表里发出一封信，宣布建
* 立了另外一棵kernel tree，这就是Linux staging tree。Greg解释到，staging tree
* 建立之目的是用来放置一些未充分测试或者因为一些其他原因
* 未能进入内核的新增驱动程序和新增文件系统。
*/
add_taint_module(mod, TAINT_CRAP);
printk(KERN_WARNING "%s: module is from the staging directory,"
" the quality is unknown, you have been warned.\n",
mod->name);
}

/* Now copy in args */
/*
* 将插入模块时指定的参数从用于空间拷贝到args中。
*/
args = strndup_user(uargs, ~0UL >> 1);
if (IS_ERR(args)) {
err = PTR_ERR(args);
goto free_hdr;
}

/*
* 为与符号表相关的字符串表段在内存中分配用于映射的空间。
* sechdrs[strindex].sh_size是与符号表相关的字符串表段的大小。
* 这里分配的是一个位图，用于符号表中的符号名称的
* 映射。
*/
strmap = kzalloc(BITS_TO_LONGS(sechdrs[strindex].sh_size)
* sizeof(long), GFP_KERNEL);
if (!strmap) {
err = -ENOMEM;
goto free_mod;
}

/*
* 查找当前要加载的模块是否已经存在，如果存在，则
* 跳转到free_mod标签处。
*/
if (find_module(mod->name)) {
err = -EEXIST;
goto free_mod;
}

mod->state = MODULE_STATE_COMING;

/* Allow arches to frob section contents and sizes. */
/*
* err总是为0
*/
err = module_frob_arch_sections(hdr, sechdrs, secstrings, mod);
if (err < 0)
goto free_mod;

/*
* 如果存在.data.percpu段，则为该段在内存中分配空间。
* 分配成功后，移除SHF_ALLOC标志，并且初始化module实例
* 的percpu成员。
*/
if (pcpuindex) {
/* We have a special allocation for this section. */
percpu = percpu_modalloc(sechdrs[pcpuindex].sh_size,
sechdrs[pcpuindex].sh_addralign,
mod->name);
if (!percpu) {
err = -ENOMEM;
goto free_mod;
}
sechdrs[pcpuindex].sh_flags &= ~(unsigned long)SHF_ALLOC;
mod->percpu = percpu;
}

/* Determine total sizes, and put offsets in sh_entsize. For now
this is done generically; there doesn't appear to be any
special cases for the architectures. */
/*
* 对core section和init section中的大小及代码段的信息进行
* 统计
*/
layout_sections(mod, hdr, sechdrs, secstrings);
/*
* 处理符号表中的符号，返回值是core section尾部的
* 符号表的偏移。
*/
symoffs = layout_symtab(mod, sechdrs, symindex, strindex, hdr,
secstrings, &stroffs, strmap);

/* Do the allocs. */
/*
* 为core section分配内存，初始化后存储在module实例
* 的module_core成员中。
*/
ptr = module_alloc_update_bounds(mod->core_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. Just mark it as not being a
* leak.
*/
kmemleak_not_leak(ptr);
if (!ptr) {
err = -ENOMEM;
goto free_percpu;
}
memset(ptr, 0, mod->core_size);
mod->module_core = ptr;

/*
* 为init section分配内存，初始化后存储在module实例
* 的module_init成员中。
*/
ptr = module_alloc_update_bounds(mod->init_size);
/*
* The pointer to this block is stored in the module structure
* which is inside the block. This block doesn't need to be
* scanned as it contains data and code that will be freed
* after the module is initialized.
*/
kmemleak_ignore(ptr);
if (!ptr && mod->init_size) {
err = -ENOMEM;
goto free_core;
}
memset(ptr, 0, mod->init_size);
mod->module_init = ptr;

/* Transfer each section which specifies SHF_ALLOC */
DEBUGP("final section addresses:\n");
/*
* 遍历段首部表，拷贝需要占用内存的段到
* init section 或core section，并且调整各个段的运行
* 时地址。
*/
for (i = 0; i < hdr->e_shnum; i++) {
void *dest;

/*
* 如果当前段执行时不占用内存，
* 则不处理
*/
if (!(sechdrs[i].sh_flags & SHF_ALLOC))
continue;

/*
* 如果段首部的sh_entsize的最高位设置的话，
* 表示该段属于init section，则从module_init开始的内存中获取
* 当前段应该存储的地址，否则从module_core开始的内存
* 中获取当前段应该存储的地址。
*/
if (sechdrs[i].sh_entsize & INIT_OFFSET_MASK)
dest = mod->module_init
+ (sechdrs[i].sh_entsize & ~INIT_OFFSET_MASK);
else
dest = mod->module_core + sechdrs[i].sh_entsize;

/*
* 将当前段的内容从ELF文件头拷贝到指定的
* 段(init section或core section)中
*/
if (sechdrs[i].sh_type != SHT_NOBITS)
memcpy(dest, (void *)sechdrs[i].sh_addr,
sechdrs[i].sh_size);
/* Update sh_addr to point to copy in image. */
/*
* 更改段的运行时地址,sh_addr原先存储的地址是
* 相对于ELF文件头的地址
*/
sechdrs[i].sh_addr = (unsigned long)dest;
DEBUGP("\t0x%lx %s\n", sechdrs[i].sh_addr, secstrings + sechdrs[i].sh_name);
}
/* Module has been moved. */
mod = (void *)sechdrs[modindex].sh_addr;
kmemleak_load_module(mod, hdr, sechdrs, secstrings);

#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
/*
* 初始化多处理下用于引用计数的refptr成员
*/
mod->refptr = percpu_modalloc(sizeof(local_t), __alignof__(local_t),
mod->name);
if (!mod->refptr) {
err = -ENOMEM;
goto free_init;
}
#endif
/* Now we've moved module, initialize linked lists, etc. */
/*
* 初始化卸载模块时的处理
*/
module_unload_init(mod);

/* add kobject, so we can reference it. */
/*
* 在sysfs中创建模块对应的对象，可以在通过/sys/module/module_name
* 查看。
*/
err = mod_sysfs_init(mod);
if (err)
goto free_unload;

/* Set up license info based on the info section */
/*
* 从.modinfo段获取license对应的值，检查是否兼容
*/
set_license(mod, get_modinfo(sechdrs, infoindex, "license"));

/*
* ndiswrapper is under GPL by itself, but loads proprietary modules.
* Don't use add_taint_module(), as it would prevent ndiswrapper from
* using GPL-only symbols it needs.
*/
if (strcmp(mod->name, "ndiswrapper") == 0)
add_taint(TAINT_PROPRIETARY_MODULE);

/* driverloader was caught wrongly pretending to be under GPL */
if (strcmp(mod->name, "driverloader") == 0)
add_taint_module(mod, TAINT_PROPRIETARY_MODULE);

/* Set up MODINFO_ATTR fields */
/*
* 根据.modinfo段设置模块信息。
*/
setup_modinfo(mod, sechdrs, infoindex);

/* Fix up syms, so that st_value is a pointer to location. */
/*
* 解决当前模块对其他模块的符号引用问题，
* 并找到符号对应的值的地址
*/
err = simplify_symbols(sechdrs, symindex, strtab, versindex, pcpuindex,
mod);
if (err < 0)
goto cleanup;

/* Now we've got everything in the final locations, we can
* find optional sections. */
/*
* 获取__param段的运行时地址，及其存储的
* 对象的个数。
*/
mod->kp = section_objs(hdr, sechdrs, secstrings, "__param",
sizeof(*mod->kp), &mod->num_kp);
/*
* 获取__ksymtab段的运行时地址，及其存储的
* 对象的个数。
*/
mod->syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab",
sizeof(*mod->syms), &mod->num_syms);
/*
* 获取__kcrctab段的运行时地址。
*/
mod->crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab");
/*
* 获取__ksymtab_gpl段的运行时地址，及其存储的
* 对象的个数。
*/
mod->gpl_syms = section_objs(hdr, sechdrs, secstrings, "__ksymtab_gpl",
sizeof(*mod->gpl_syms),
&mod->num_gpl_syms);
/*
* 获取__kcrctab_gpl段的运行时地址。
*/
mod->gpl_crcs = section_addr(hdr, sechdrs, secstrings, "__kcrctab_gpl");
/*
* 获取__ksymtab_gpl_future段的运行时地址，及其存储的
* 对象的个数。
*/
mod->gpl_future_syms = section_objs(hdr, sechdrs, secstrings,
"__ksymtab_gpl_future",
sizeof(*mod->gpl_future_syms),
&mod->num_gpl_future_syms);
/*
* 获取__kcrctab_gpl_future段的运行时地址。
*/
mod->gpl_future_crcs = section_addr(hdr, sechdrs, secstrings,
"__kcrctab_gpl_future");

#ifdef CONFIG_UNUSED_SYMBOLS
/*
* 获取__ksymtab_unused段的运行时地址，及其存储的
* 对象的个数。
*/
mod->unused_syms = section_objs(hdr, sechdrs, secstrings,
"__ksymtab_unused",
sizeof(*mod->unused_syms),
&mod->num_unused_syms);
/*
* 获取__kcrctab_unused段的运行时地址。
*/
mod->unused_crcs = section_addr(hdr, sechdrs, secstrings,
"__kcrctab_unused");
/*
* 获取__ksymtab_unused_gpl段的运行时地址，及其存储的
* 对象的个数。
*/
mod->unused_gpl_syms = section_objs(hdr, sechdrs, secstrings,
"__ksymtab_unused_gpl",
sizeof(*mod->unused_gpl_syms),
&mod->num_unused_gpl_syms);
/*
* 获取__kcrctab_unused_gpl段的运行时地址。
*/
mod->unused_gpl_crcs = section_addr(hdr, sechdrs, secstrings,
"__kcrctab_unused_gpl");
#endif
#ifdef CONFIG_CONSTRUCTORS
/*
* 获取.ctors段的运行时地址，及其存储的
* 对象的个数。
*/
mod->ctors = section_objs(hdr, sechdrs, secstrings, ".ctors",
sizeof(*mod->ctors), &mod->num_ctors);
#endif

#ifdef CONFIG_TRACEPOINTS
/*
* 获取__tracepoints段的运行时地址，及其存储的
* 对象的个数。
*/
mod->tracepoints = section_objs(hdr, sechdrs, secstrings,
"__tracepoints",
sizeof(*mod->tracepoints),
&mod->num_tracepoints);
#endif
#ifdef CONFIG_EVENT_TRACING
/*
* 获取_ftrace_events段的运行时地址，及其存储的
* 对象的个数。
*/
mod->trace_events = section_objs(hdr, sechdrs, secstrings,
"_ftrace_events",
sizeof(*mod->trace_events),
&mod->num_trace_events);
#endif
#ifdef CONFIG_FTRACE_MCOUNT_RECORD
/* sechdrs[0].sh_size is always zero */
/*
* 获取__mcount_loc段的运行时地址，及其存储的
* 对象的个数。
*/
mod->ftrace_callsites = section_objs(hdr, sechdrs, secstrings,
"__mcount_loc",
sizeof(*mod->ftrace_callsites),
&mod->num_ftrace_callsites);
#endif
#ifdef CONFIG_MODVERSIONS
if ((mod->num_syms && !mod->crcs)
|| (mod->num_gpl_syms && !mod->gpl_crcs)
|| (mod->num_gpl_future_syms && !mod->gpl_future_crcs)
#ifdef CONFIG_UNUSED_SYMBOLS
|| (mod->num_unused_syms && !mod->unused_crcs)
|| (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs)
#endif
) {
err = try_to_force_load(mod,
"no versions for exported symbols");
if (err)
goto cleanup;
}
#endif

/* Now do relocations. */
for (i = 1; i < hdr->e_shnum; i++) {
const char *strtab = (char *)sechdrs[strindex].sh_addr;
unsigned int info = sechdrs[i].sh_info;

/* Not a valid relocation section? */
/*
* 如果当前段附加的段的索引大于段的数目，
* 则info不是一个有效的索引，不做处理。
*/
if (info >= hdr->e_shnum)
continue;

/* Don't bother with non-allocated sections */
/*
* 如果段在执行过程中不占内存，则
* 不需要进行处理。
*/
if (!(sechdrs[info].sh_flags & SHF_ALLOC))
continue;

/*
* 如果当前段包含重定向表项，但是没有补齐内容
* 则调用apply_relocate来处理。(只关心64位系统)。
*/
if (sechdrs[i].sh_type == SHT_REL)
err = apply_relocate(sechdrs, strtab, symindex, i,mod);
/*
* 如果当前段包含重定向表项，但是可能有补齐内容
* 则调用apply_relocate_add来处理。
*/
else if (sechdrs[i].sh_type == SHT_RELA)
err = apply_relocate_add(sechdrs, strtab, symindex, i,
mod);
if (err < 0)
goto cleanup;
}

/* Find duplicate symbols */
/*
* 检查模块导出的符号在内核导出的或其他模块
* 导出的符号是否有重复的。
*/
err = verify_export_symbols(mod);
if (err < 0)
goto cleanup;

/* Set up and sort exception table */
/*
* 获取__ex_table段的运行时地址，及其存储的
* 对象的个数。
*/
mod->extable = section_objs(hdr, sechdrs, secstrings, "__ex_table",
sizeof(*mod->extable), &mod->num_exentries);
sort_extable(mod->extable, mod->extable + mod->num_exentries);

/* Finally, copy percpu area over. */
percpu_modcopy(mod->percpu, (void *)sechdrs[pcpuindex].sh_addr,
sechdrs[pcpuindex].sh_size);

/*
* 初始化模块中字符串表、符号表相关的成员，
* 初始化core section中的字符串表和符号表。
*/
add_kallsyms(mod, sechdrs, hdr->e_shnum, symindex, strindex,
symoffs, stroffs, secstrings, strmap);
/*
* 释放用于字符串表名称映射的位图
*/
kfree(strmap);
strmap = NULL;

if (!mod->taints) {
/*
* 处理用于debug的段，不关注这个。
*/
struct _ddebug *debug;
unsigned int num_debug;

debug = section_objs(hdr, sechdrs, secstrings, "__verbose",
sizeof(*debug), &num_debug);
if (debug)
dynamic_debug_setup(debug, num_debug);
}

err = module_finalize(hdr, sechdrs, mod);
if (err < 0)
goto cleanup;

/* flush the icache in correct context */
/*
* get_fs是用来获取当前进程的地址限制，当当前的限制是
* KERNEL_DS时，内核不会检查参数中的地址类型
*/
old_fs = get_fs();
set_fs(KERNEL_DS);

/*
* Flush the instruction cache, since we've played with text.
* Do it before processing of module parameters, so the module
* can provide parameter accessor functions of its own.
*/
/*
* flush_icache_range函数中没有任何操作，不用考虑。
*/
if (mod->module_init)
flush_icache_range((unsigned long)mod->module_init,
(unsigned long)mod->module_init
+ mod->init_size);
flush_icache_range((unsigned long)mod->module_core,
(unsigned long)mod->module_core + mod->core_size);

set_fs(old_fs);

mod->args = args;
if (section_addr(hdr, sechdrs, secstrings, "__obsparm"))
printk(KERN_WARNING "%s: Ignoring obsolete parameters\n",
mod->name);

/* Now sew it into the lists so we can get lockdep and oops
* info during argument parsing. Noone should access us, since
* strong_try_module_get() will fail.
* lockdep/oops can run asynchronous, so use the RCU list insertion
* function to insert in a way safe to concurrent readers.
* The mutex protects against concurrent writers.
*/
list_add_rcu(&mod->list, &modules);

/*
* 解析插入模块时指定的参数。
*/
err = parse_args(mod->name, mod->args, mod->kp, mod->num_kp, NULL);
if (err < 0)
goto unlink;

/*
* 在sysfs中创建模块相应的项
*/
err = mod_sysfs_setup(mod, mod->kp, mod->num_kp);
if (err < 0)
goto unlink;
/*
* 添加段属性
*/
add_sect_attrs(mod, hdr->e_shnum, secstrings, sechdrs);
/*
* 添加注解属性
*/
add_notes_attrs(mod, hdr->e_shnum, secstrings, sechdrs);

/* Get rid of temporary copy */
vfree(hdr);

trace_module_load(mod);

/* Done! */
return mod;

unlink:
/* Unlink carefully: kallsyms could be walking list. */
list_del_rcu(&mod->list);
synchronize_sched();
module_arch_cleanup(mod);
cleanup:
free_modinfo(mod);
kobject_del(&mod->mkobj.kobj);
kobject_put(&mod->mkobj.kobj);
free_unload:
module_unload_free(mod);
#if defined(CONFIG_MODULE_UNLOAD) && defined(CONFIG_SMP)
percpu_modfree(mod->refptr);
free_init:
#endif
module_free(mod, mod->module_init);
free_core:
module_free(mod, mod->module_core);
/* mod will be freed with core. Don't access it beyond this line! */
free_percpu:
if (percpu)
percpu_modfree(percpu);
free_mod:
kfree(args);
kfree(strmap);
free_hdr:
vfree(hdr);
return ERR_PTR(err);

truncated:
printk(KERN_ERR "Module len %lu truncated\n", len);
err = -ENOEXEC;
goto free_hdr;
}

推荐文章：

Linux内核模块的加载过程