`
peng_wp
  • 浏览: 42102 次
社区版块
存档分类
最新评论

Linux 多核启动过程

阅读更多
以这篇博文来纪念自己与“Linux kernel多核启动”相处的两个多月。
本文章以2.6.33.1的linux内核在x86_64平台上为例进行说明。
本文参考了http://tldp.org/HOWTO/Linux-i386-Boot-Code-HOWTO/smpboot.html

Linux kernel启动的过程概览
init/main.c:start_kernel()
    |
   \|/
init/main.c:rest_init
{
……
kernel_thread(kernel_init, NULL, CLONES_FS | CLONE_SIGHAND)
……
cpu_idle()
}
    |
   \|/
init/main.c:kernel_init//从上面代码可以看出,kernel_init是一个内核线程 
    |
   \|/
init/main.c:init_post  //会在最后调用启动脚本
{
……
823         /*
824          * We try each of these until one succeeds.
825          *
826          * The Bourne shell can be used instead of init if we are
827          * trying to recover a really broken machine.
828          */
829         if (execute_command) {
830                 run_init_process(execute_command);
831                 printk(KERN_WARNING "Failed to execute %s.  Attempting "
832                                         "defaults...\n", execute_command);
833         }
834         run_init_process("/sbin/init");
835         run_init_process("/etc/init");
836         run_init_process("/bin/init");
837         run_init_process("/bin/sh");
838
839         panic("No init found.  Try passing init= option to kernel.");
……
}


我们再来看看内核启动多核的详细过程。

init/main.c:start_kernel()
    |
   \|/
init/main.c:rest_init
{
……
kernel_thread(kernel_init, NULL, CLONES_FS | CLONE_SIGHAND)
……
}
    |
   \|/
kernel_init    
    |
   \|/
/* called by boot processor to activate the rest */
init/main.c: smp_init()
{
……
for_each_present_cpu(cpu) {
          if (num_onlien_cpus() >= setup_max_cpus)
               break;
          if ( !cpu_online(cpu))     
               cpu_up(cpu);
}
/* Any cleanup work */
printk(KERN_INFO "Brought up %ld CPUs\n", (long)num_online_cpus());
smp_cpu_done(setup_max_cpus);
……
}
--------------------------------------------------------------
cpu_up = native_cpu_up是一个回调函数。
注册地方是在:arch/x86/kernel/smp.c

struct smp_ops smp_ops = {
   ……
  .cpu_up = native_cpu_up,
   ……
}
--------------------------------------------------------------
    |
   \|/
arch/x86/kernel/smpboot.c:native_cpu_up(unsigned int cpu)
    |
   \|/
arch/x86/kernel/smpboot.c: do_boot_cpu(int apicid, int cpu)
    |
   \|/
wakeup_secondary_cpu_via_init(apicid, start_ip)


在启动多核的过程中有两个bitmap很重要,一个是cpu_callin_mask,另一个是cpu_callout_mask。
cpu_callin_mask代表某个cpu是否已经启动,它的某个bit被与之对应的cpu在启动后置位,标记已经启动。
cpu_callout_mask在do_boot_cpu中被置位,并在检查到对应cpu已经启动后重新清零。

我们下面来详细看看do_boot_cpu(int apicid, int cpu)与wakeup_secondary_cpu_via_init(apicid, start_ip)

/*
 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
 * Returns zero if CPU booted OK, else error code from
 * ->wakeup_secondary_cpu.
 */
static int __cpuinit do_boot_cpu(int apicid, int cpu)
{
	unsigned long boot_error = 0;
	unsigned long start_ip;
	int timeout;
	struct create_idle c_idle = {
		.cpu	= cpu,
		.done	= COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
	};

	INIT_WORK_ON_STACK(&c_idle.work, do_fork_idle);

	alternatives_smp_switch(1);

	c_idle.idle = get_idle_for_cpu(cpu);

	/*
	 * We can't use kernel_thread since we must avoid to
	 * reschedule the child.
	 */
	if (c_idle.idle) {
		c_idle.idle->thread.sp = (unsigned long) (((struct pt_regs *)
			(THREAD_SIZE +  task_stack_page(c_idle.idle))) - 1);
		init_idle(c_idle.idle, cpu);
		goto do_rest;
	}

	if (!keventd_up() || current_is_keventd())
		c_idle.work.func(&c_idle.work);
	else {
		schedule_work(&c_idle.work);
		wait_for_completion(&c_idle.done);
	}

	if (IS_ERR(c_idle.idle)) {
		printk("failed fork for CPU %d\n", cpu);
		destroy_work_on_stack(&c_idle.work);
		return PTR_ERR(c_idle.idle);
	}

	set_idle_for_cpu(cpu, c_idle.idle);
do_rest:
	per_cpu(current_task, cpu) = c_idle.idle;
#ifdef CONFIG_X86_32
	/* Stack for startup_32 can be just as for start_secondary onwards */
	irq_ctx_init(cpu);
#else
	clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
	initial_gs = per_cpu_offset(cpu);
	per_cpu(kernel_stack, cpu) =
		(unsigned long)task_stack_page(c_idle.idle) -
		KERNEL_STACK_OFFSET + THREAD_SIZE;
#endif
	early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
	initial_code = (unsigned long)start_secondary;
	stack_start.sp = (void *) c_idle.idle->thread.sp;

	/* start_ip had better be page-aligned! */
	start_ip = setup_trampoline();

	/* So we see what's up */
	announce_cpu(cpu, apicid);

	/*
	 * This grunge runs the startup process for
	 * the targeted processor.
	 */

	atomic_set(&init_deasserted, 0);

	if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {

		pr_debug("Setting warm reset code and vector.\n");

		smpboot_setup_warm_reset_vector(start_ip);
		/*
		 * Be paranoid about clearing APIC errors.
		*/
		if (APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
			apic_write(APIC_ESR, 0);
			apic_read(APIC_ESR);
		}
	}

	/*
	 * Kick the secondary CPU. Use the method in the APIC driver
	 * if it's defined - or use an INIT boot APIC message otherwise:
	 */
	if (apic->wakeup_secondary_cpu)
		boot_error = apic->wakeup_secondary_cpu(apicid, start_ip);
	else
		boot_error = wakeup_secondary_cpu_via_init(apicid, start_ip);

	if (!boot_error) {
		/*
		 * allow APs to start initializing.
		 */
		pr_debug("Before Callout %d.\n", cpu);
		cpumask_set_cpu(cpu, cpu_callout_mask);
		pr_debug("After Callout %d.\n", cpu);

		/*
		 * Wait 5s total for a response
		 */
		for (timeout = 0; timeout < 50000; timeout++) {
			if (cpumask_test_cpu(cpu, cpu_callin_mask))
				break;	/* It has booted */
			udelay(100);
		}

		if (cpumask_test_cpu(cpu, cpu_callin_mask))
			pr_debug("CPU%d: has booted.\n", cpu);
		else {
			boot_error = 1;
			if (*((volatile unsigned char *)trampoline_base)
					== 0xA5)
				/* trampoline started but...? */
				pr_err("CPU%d: Stuck ??\n", cpu);
			else
				/* trampoline code not run */
				pr_err("CPU%d: Not responding.\n", cpu);
			if (apic->inquire_remote_apic)
				apic->inquire_remote_apic(apicid);
		}
	}

	if (boot_error) {
		/* Try to put things back the way they were before ... */
		numa_remove_cpu(cpu); /* was set by numa_add_cpu */

		/* was set by do_boot_cpu() */
		cpumask_clear_cpu(cpu, cpu_callout_mask);

		/* was set by cpu_init() */
		cpumask_clear_cpu(cpu, cpu_initialized_mask);

		set_cpu_present(cpu, false);
		per_cpu(x86_cpu_to_apicid, cpu) = BAD_APICID;
	}

	/* mark "stuck" area as not stuck */
	*((volatile unsigned long *)trampoline_base) = 0;

	if (get_uv_system_type() != UV_NON_UNIQUE_APIC) {
		/*
		 * Cleanup possible dangling ends...
		 */
		smpboot_restore_warm_reset_vector();
	}

	destroy_work_on_stack(&c_idle.work);
	return boot_error;
}



/*
 * Currently trivial. Write the real->protected mode
 * bootstrap into the page concerned. The caller
 * has made sure it's suitably aligned.
 */
unsigned long __trampinit setup_trampoline(void)
{
        memcpy(trampoline_base, trampoline_data, TRAMPOLINE_SIZE);
        return virt_to_phys(trampoline_base);
}


可以从上面代码中看出do_boot_cpu会为编号为apicid的AP设定好它将要使用的stack以及它将要执行的代码start_eip,在完成这些后,通过发送IPI序列来启动AP,
并会将cpu_callout_mask的代表相应AP的位清零。


static int __cpuinit
wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
{
	unsigned long send_status, accept_status = 0;
	int maxlvt, num_starts, j;

	maxlvt = lapic_get_maxlvt();

	/*
	 * Be paranoid about clearing APIC errors.
	 */
	if (APIC_INTEGRATED(apic_version[phys_apicid])) {
		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
			apic_write(APIC_ESR, 0);
		apic_read(APIC_ESR);
	}

	pr_debug("Asserting INIT.\n");

	/*
	 * Turn INIT on target chip
	 */
	/*
	 * Send IPI
	 */
	apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
		       phys_apicid);

	pr_debug("Waiting for send to finish...\n");
	send_status = safe_apic_wait_icr_idle();

	mdelay(10);

	pr_debug("Deasserting INIT.\n");

	/* Target chip */
	/* Send IPI */
	apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);

	pr_debug("Waiting for send to finish...\n");
	send_status = safe_apic_wait_icr_idle();

	mb();
	atomic_set(&init_deasserted, 1);

	/*
	 * Should we send STARTUP IPIs ?
	 *
	 * Determine this based on the APIC version.
	 * If we don't have an integrated APIC, don't send the STARTUP IPIs.
	 */
	if (APIC_INTEGRATED(apic_version[phys_apicid]))
		num_starts = 2;
	else
		num_starts = 0;

	/*
	 * Paravirt / VMI wants a startup IPI hook here to set up the
	 * target processor state.
	 */
	startup_ipi_hook(phys_apicid, (unsigned long) start_secondary,
			 (unsigned long)stack_start.sp);

	/*
	 * Run STARTUP IPI loop.
	 */
	pr_debug("#startup loops: %d.\n", num_starts);

	for (j = 1; j <= num_starts; j++) {
		pr_debug("Sending STARTUP #%d.\n", j);
		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
			apic_write(APIC_ESR, 0);
		apic_read(APIC_ESR);
		pr_debug("After apic_write.\n");

		/*
		 * STARTUP IPI
		 */

		/* Target chip */
		/* Boot on the stack */
		/* Kick the second */
		apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
			       phys_apicid);

		/*
		 * Give the other CPU some time to accept the IPI.
		 */
		udelay(300);

		pr_debug("Startup point 1.\n");

		pr_debug("Waiting for send to finish...\n");
		send_status = safe_apic_wait_icr_idle();

		/*
		 * Give the other CPU some time to accept the IPI.
		 */
		udelay(200);
		if (maxlvt > 3)		/* Due to the Pentium erratum 3AP.  */
			apic_write(APIC_ESR, 0);
		accept_status = (apic_read(APIC_ESR) & 0xEF);
		if (send_status || accept_status)
			break;
	}
	pr_debug("After Startup.\n");

	if (send_status)
		printk(KERN_ERR "APIC never delivered???\n");
	if (accept_status)
		printk(KERN_ERR "APIC delivery error (%lx).\n", accept_status);

	return (send_status | accept_status);
}



一段wakeup_secondary_cpu_via_init执行的log
656 CPU17: has booted.
657 WP output: cpu :18
658 ------native_cpu_up cpu:18, apicid:18----------
659 ------------in 3 do_boot_cpu------- #18
660 Asserting INIT.
661 Waiting for send to finish...
662 Deasserting INIT.
663 Waiting for send to finish...
664 #startup loops: 2.
665 Sending STARTUP #1.
666 After apic_write.
667 Startup point 1.
668 Waiting for send to finish...
669 Sending STARTUP #2.
670 After apic_write.
671 Startup point 1.
672 Waiting for send to finish...
673 in the cpu_init())
674 After Startup.
675 Before Callout 18.
676 After Callout 18.
677 cpu is: 12
678 in the enable_x2apic()
679 ------in x2apic_phys_get_apic_id-----
680 CPU#18 (phys ID: 18) waiting for CALLOUT
681 CALLIN, before setup_local_APIC().
682 ------3------
683 Stack at about ffff88021f953f44
684 ------in x2apic_phys_get_apic_id-----
685 CPU18: has booted.

wakeup_secondary_cpu_via_init是与硬件相关的代码,它的主要作用是通过发送INIT-INIT-Startup IPI序列来将AP从halted的状态唤醒并让它开始执行代码start_eip所指向的代码。
Startup IPI会有一个域来指定需要执行代码的地址:apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12), phys_apicid);
如果想彻底搞清楚一段代码,请去看Intel文档。


start_secondary是AP会执行的代码,这段代码通过smp_callin来将设定cpu_callin_mask来告诉BSP它已经启动。start_secondary最后是idle循环。
/*
 * Activate a secondary processor.
 */
notrace static void __cpuinit start_secondary(void *unused)
{
	/*
	 * Don't put *anything* before cpu_init(), SMP booting is too
	 * fragile that we want to limit the things done here to the
	 * most necessary things.
	 */
	vmi_bringup();
	cpu_init();
	preempt_disable();
	smp_callin();

	/* otherwise gcc will move up smp_processor_id before the cpu_init */
	barrier();
	/*
	 * Check TSC synchronization with the BP:
	 */
	check_tsc_sync_target();

	if (nmi_watchdog == NMI_IO_APIC) {
		disable_8259A_irq(0);
		enable_NMI_through_LVT0();
		enable_8259A_irq(0);
	}

#ifdef CONFIG_X86_32
	while (low_mappings)
		cpu_relax();
	__flush_tlb_all();
#endif

	/* This must be done before setting cpu_online_mask */
	set_cpu_sibling_map(raw_smp_processor_id());
	wmb();

	/*
	 * We need to hold call_lock, so there is no inconsistency
	 * between the time smp_call_function() determines number of
	 * IPI recipients, and the time when the determination is made
	 * for which cpus receive the IPI. Holding this
	 * lock helps us to not include this cpu in a currently in progress
	 * smp_call_function().
	 *
	 * We need to hold vector_lock so there the set of online cpus
	 * does not change while we are assigning vectors to cpus.  Holding
	 * this lock ensures we don't half assign or remove an irq from a cpu.
	 */
	ipi_call_lock();
	lock_vector_lock();
	__setup_vector_irq(smp_processor_id());
	set_cpu_online(smp_processor_id(), true);
	unlock_vector_lock();
	ipi_call_unlock();
	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;

	/* enable local interrupts */
	local_irq_enable();

	x86_cpuinit.setup_percpu_clockev();

	wmb();
	cpu_idle();
}
分享到:
评论

相关推荐

    基于smp模式的linux多核启动分析

    ### 基于SMP模式的Linux多核启动分析 #### 概述 本文旨在深入探讨基于SMP(Symmetric Multi-Processing)架构下的Linux操作系统如何实现多核启动及相应设备初始化的过程。特别针对Xilinx Zynq平台,通过对Linux...

    linux多核CPU启动流程图

    综上所述,Linux多核CPU的启动流程涉及了固件、引导加载程序、内核初始化、SMP配置以及PSCI电源管理等多个环节,确保每个CPU都能够正确启动并参与到系统的工作中。这一过程的高效和正确性对于系统的稳定性和性能至关...

    Linux多核CPU控制启动核数.pdf

    在`kernel`行的末尾,我们看到`ro root=/dev/VolGroup00/LogVol00 rhgb quiet`这些参数,它们分别表示以只读模式挂载根文件系统、启用图形化启动界面和保持启动过程安静。为了限制启动核数,我们需要添加`maxcpus=n`...

    ARM Linux启动过程分析

    ARM Linux启动过程分析 1. ARM Linux 启动过程概述 ARM Linux 启动过程可以分为四个部分:引导加载程序(bootloader),Linux 内核,文件系统,应用程序。其中 bootloader 是系统启动或复位以后执行的第一段代码,...

    剖析Linux系统启动过程

    剖析Linux系统启动过程 本文将从用户打开电源直到屏幕出现命令行提示符的整个Linux启动过程进行剖析,并介绍了启动中涉及到的各种文件。 一、BIOS开机自检和引导程序 当用户打开PC的电源,BIOS开机自检,按BIOS中...

    多处理器平台下Linux 2.6启动过程中的位图分析.pdf

    在多处理器平台上,Linux 2.6 操作系统的启动过程涉及到复杂的位图管理机制,这是为了有效地管理和调度多个处理器资源。位图在Linux内核中扮演着关键角色,尤其是在对称多处理器(Symmetric Multi-Processor,SMP)...

    linux内核启动地址修改

    总结来说,修改Linux内核启动地址是一个涉及到内存管理的重要过程,它可以帮助我们在多核系统或者需要特定硬件资源分配的场合中,更高效地使用有限的内存资源。通过上述步骤,可以灵活地调整内核与DSP在内存中的位置...

    The Linux 2.4内核启动过程.pdf

    《Linux 2.4内核启动过程》一文由William Gatliff撰写,详细解析了Linux 2.4内核自获取主机硬件控制权至准备就绪运行用户进程的整个启动流程。文章不仅覆盖了Linux在启动时所期待的编程环境、外设初始化方式,还阐述...

    linux-让多核CPU达到指定的CPU使用率脚本

    在Linux操作系统中,多核CPU的使用率管理是优化系统性能的关键环节。有时,我们可能需要手动调整CPU的使用率,以满足特定的工作负载需求。本文将深入探讨如何通过脚本实现这一目标,并结合相关技术原理进行讲解。 ...

    基于扁平设备树的Linux内核启动方式.pdf

    Linux操作系统的启动过程中,需要由U-Boot(bootloader)向内核传递一些必要的参数,例如内存大小、initrd地址、命令行、外围设备信息等内容。以前的传递方式是通过struct bd_info这个结构体来实现,但这种方式缺乏...

    linux面试问题及回答方案.doc

    Linux 是一个开放源代码的操作系统,它的面试问题涵盖了 Linux 操作系统的各个方面,本文将从 Linux 的启动过程、文件删除原理、软连接与硬连接的区别等方面总结出相关的知识点。 Linux 启动过程 Linux 的启动过程...

    linux内核编译详细过程

    ### Linux内核编译详细过程解析 #### 编译前准备 在开始编译Linux内核之前,确保完成了一系列的准备工作。首先,需要从官方源下载内核源代码,例如`linux-2.6.7.tar.bz2`,可以从`...

    Zynq启动、AMP加载、uCos、Linux等基本介绍

    文档共60页。主要向初学者提供了Zynq开发的技术方向,针对不同应用给出了基本的参考文档;同时对Zynq双核AMP加载方式做了详细描述,对Zynq的fsbl启动流程做了简单介绍。章节如下: ...8 Linux系统启动指南 59

    Zynq-7000&ZynqMP;启动配置和启动文件.pdf

    在介绍Zynq-7000和Zynq UltraScale+ MPSoC的启动配置和启动文件之前,我们需要了解它们的基本结构和启动过程。Zynq系列是Xilinx推出的一款系统级芯片(SoC),它将ARM处理器与FPGA集成在一个芯片上,以此提供强大的...

    hp dl385G7安装Linux 5.4

    然而,在安装过程中,由于硬件特性与Linux内核的交互,有时会出现启动问题,表现为“红屏”,即系统无法正常进入。这个问题通常与服务器的多核处理能力和APIC(高级可编程中断控制器)设置有关。 首先,我们需要在...

    linux_android_uefi_overview

    UEFI是一种在启动过程中用来初始化系统硬件的软件,并且加载操作系统。它替代了传统的BIOS(基本输入输出系统),带来许多改进,比如更强大的驱动支持,更快的启动速度,以及更为强大的安全特性。UEFI在Android设备...

    linux 内存映射机制

    这一机制能够确保系统在启动过程中正确地管理内存映射,并在系统稳定运行时维持高效的内存管理。 为了更好地理解内存映射机制,可以通过工具来浏览Linux内核源代码。例如,使用vim结合ctags和cscope可以有效地查阅...

    Professional Linux Kernel Architecture

    - **附录D:系统启动**:介绍了Linux系统启动过程中的各个阶段,有助于理解系统的初始化流程。 - **附录E:ELF二进制格式**:介绍了Linux常用的ELF文件格式,这对于理解程序的链接和装载过程非常重要。 - **附录F:...

    基于多核DSP处理器DM8168的视频处理方法

    在软件的启动过程方面,DM8168的主处理器ARMCortexA8在上电后由UBoot引导Linux操作系统启动,完成电源管理、重启控制,并将可执行文件入口设置到相应寄存器中。这一系列操作使得DM8168能够在多核环境下高效地进行...

    NVIDIA_Jetson_Linux_Driver_Package-32.4.3.zip

    本篇将深入解析"32.4.3"版本的NVIDIA Jetson Linux Driver Package,并探讨其关键组成部分及其在开发过程中的作用。 1. **BSP(Board Support Package)** BSP是嵌入式系统中至关重要的部分,它包含了一组特定硬件...

Global site tag (gtag.js) - Google Analytics