asm { ALIGN 16, OC_NOP USE16 //See ZealOS MultiCore. //This code gets copied to MP_VECT_ADDR. //See MemCopy(MP_VECT_ADDR. COREAP_16BIT_INIT:: JMP @@05 ALIGN 4, OC_NOP AP_GDT_PTR: DU16 sizeof(CGDT) - 1; DU64 0; @@05: CLI WBINVD MOV AX, MP_VECT_ADDR / 16 MOV DS, AX LGDT U32 [CAP16BitInit.ap_gdt_ptr] //See mp->ap_gdt_ptr MOV EAX, SYS_START_CR0 MOV_CR0_EAX MOV_EAX_CR4 BTS EAX, CR4f_OSFXSR // enable SSE MOV_CR4_EAX DU8 0x66, 0xEA; //JMP CGDT.cs32:AP_32BIT_INIT DU32 AP_32BIT_INIT; DU16 CGDT.cs32; COREAP_16BIT_INIT_END:: USE32 AP_32BIT_INIT: MOV AX, CGDT.ds MOV DS, AX MOV ES, AX MOV FS, AX MOV GS, AX MOV SS, AX @@05: LOCK BTS U32 [SYS_MP_COUNT_LOCK], 0 JC @@05 MOV ESI, U32 [SYS_MP_COUNT_INITIAL] LOCK INC U32 [SYS_MP_COUNT_INITIAL] LOCK BTR U32 [SYS_MP_COUNT_LOCK], 0 CMP ESI, MP_PROCESSORS_NUM JAE I32 _SYS_HLT IMUL2 ESI, sizeof(CCPU) ADD ESI, U32 [SYS_CPU_STRUCTS] LEA ESP, U32 CCPU.start_stack + sizeof(CCPU.start_stack)[ESI] PUSH U32 RFLAGG_START POPFD PUSH U32 0 //Return from next call will be 64-bit CALL SYS_ENTER_LONG_MODE USE64 FNINIT MOV RAX, RSI CALL SET_GS_BASE @@10: MOV RAX, U64 CCPU.executive_task[RSI] TEST RAX, RAX JZ @@10 MOV U64 CTask.gs[RAX], RSI CALL SET_FS_BASE JMP I32 _TASK_CONTEXT_RESTORE } U0 TSSBusy(I64 tr, Bool val=OFF) {//See ::/Demo/Lectures/Ring3.CC. LBEqual((&sys_gdt)(U8 *) + tr + 4, 9, val); } CTSS *TSSNew(I64 cpu_num) { U32 *d, *d1; CTSS *tss = CAlloc(sizeof(CTSS)); tss->io_map_offset = offset(CTSS.io_map); MemSet(tss->io_map, 0xFF, 0x10000 / 8); tss->st0 = MAlloc(MEM_INTERRUPT_STACK); tss->rsp0 = tss->st0(U8 *) + MSize(tss->st0); tss->st1 = MAlloc(MEM_INTERRUPT_STACK); tss->rsp1 = tss->st1(U8 *) + MSize(tss->st1); tss->st2 = MAlloc(MEM_INTERRUPT_STACK); tss->rsp2 = tss->st2(U8 *) + MSize(tss->st2); tss->tr = offset(CGDT.tr) + cpu_num * 16; tss->tr_ring3= offset(CGDT.tr_ring3) + cpu_num * 16; d = (&sys_gdt)(U8 *) + tss->tr; d1 = d(U8 *) + 4; *d = 0x0000FFFF; *d1 = 0x008F8900; d(U8 *) += 2; *d |= tss & 0x00FFFFFF; *d1++ |= tss & 0xFF000000; *d1++ = tss >> 32; *d1 = 0; d = (&sys_gdt)(U8 *) + tss->tr_ring3; d1 = d(U8 *) + 4; *d = 0x0000FFFF; *d1 = 0x008FE900; d(U8 *) += 2; *d |= tss & 0x00FFFFFF; *d1++ |= tss & 0xFF000000; *d1++ = tss >> 32; *d1 = 0; return tss; } CCPU *CPUStructInit(I64 num, CCPU *c, CTask *executive_task) {//Executive is null when called by sys_task on CSysFixedArea.boot_cpu0 MemSet(c, 0, sizeof(CCPU)); c->addr = c; c->num = num; c->idle_factor = 0.01; QueueInit(&c->next_dying); if (Bt(&sys_run_level, RLf_16MEG_SYSTEM_HEAP_CTRL)) { c->idle_task = Spawn(0, NULL, "Idle Task",, Fs,, 0); LBts(&c->idle_task->task_flags, TASKf_IDLE); c->tss = TSSNew(num); } c->executive_task = executive_task;// It waits for this to be filled-in: executive_task return c; } U0 MPInt(U8 num, I64 cpu_num=1) {//Generate interrupt for specified core. if (cpu_num >= mp_count) { if (!Bt(&sys_run_level, RLf_MP)) return; else throw('MultCore'); } PUSHFD CLI //Multitasking safe because each core has a local apic and IRQs are off while (*(dev.uncached_alias + LAPIC_ICR_LOW)(U32 *) & 0x1000) PAUSE *(dev.uncached_alias + LAPIC_ICR_HIGH)(U32 *) = dev.mp_apic_ids[cpu_num] << 24; *(dev.uncached_alias + LAPIC_ICR_LOW)(U32 *) = 0x4000 + num; POPFD } U0 MPIntAll(U8 num) {//Generate interrupt for all but own core. PUSHFD CLI //Multitasking safe because each core has a local apic and IRQs are off while (*(dev.uncached_alias + LAPIC_ICR_LOW)(U32 *) & 0x1000) PAUSE *(dev.uncached_alias + LAPIC_ICR_LOW)(U32 *) = 0xC4800 + num; POPFD } U0 MPNMInt() {//Generate nonmaskable interrupt. *(dev.uncached_alias + LAPIC_ICR_LOW)(U32 *) = 0xC4400; } U0 MPHalt() {//Halt all other cores. mp_count = 1; MPNMInt; Busy(10000); } U0 MPAPICInit() {//Called by sys_task during start-up //and other cores during initialization //after Core0StartMP(). *(dev.uncached_alias + LAPIC_SVR)(U32 *) |= LAPICF_APIC_ENABLED; dev.mp_apic_ids[Gs->num] = *(dev.uncached_alias + LAPIC_APIC_ID)(U32 *) >> 24; *(dev.uncached_alias + LAPIC_LDR)(U32 *) = dev.mp_apic_ids[Gs->num] << 24; *(dev.uncached_alias + LAPIC_DFR)(U32 *) = 0xF0000000; // MemSet(dev.uncached_alias + LAPIC_IRR, 0, 0x20); // MemSet(dev.uncached_alias + LAPIC_ISR, 0, 0x20); // MemSet(dev.uncached_alias + LAPIC_TMR, 0, 0x20); RAXSet(Gs->tss->tr); LTR AX if (Gs->num) { IntInit1; RFlagsSet(RFLAGG_NORMAL); } } #assert !offset(CJobCtrl.next_waiting) U0 CoreAPExecutiveTask() { CJobCtrl *ctrl = &Fs->server_ctrl; while (TRUE) { STI do { TaskKillDying; do PAUSE while (LBts(&ctrl->flags, JOBCf_LOCKED)); } while (ctrl->next_waiting != ctrl && JobRunOne(RFlagsGet, ctrl)); CLI LBts(&Fs->task_flags, TASKf_AWAITING_MESSAGE); LBtr(&ctrl->flags, JOBCf_LOCKED); LBts(&Fs->task_flags, TASKf_IDLE); Yield; LBtr(&Fs->task_flags, TASKf_IDLE); } } CJob *JobQueue(I64 (*fp_addr)(U8 *data), U8 *data=NULL, I64 target_cpu=1, I64 flags=1<<JOBf_FREE_ON_COMPLETE, I64 job_code=JOBT_CALL, U8 *aux_str=NULL, I64 aux1=0, I64 aux2=0) {//Queue multicore jobs, handled by Executive tasks. //Set flags to zero if you wish to get the res. //See ::/Demo/MultiCore/Lock.CC CJobCtrl *ctrl; CJob *tmpc; CTask *executive; if (!(0 <= target_cpu < mp_count)) throw('MultCore'); tmpc = SysCAlloc(sizeof(CJob)); if (aux_str) tmpc->aux_str = SysStrNew(aux_str); tmpc->job_code = job_code; tmpc->addr = fp_addr; tmpc->fun_arg = data; tmpc->flags = flags; tmpc->aux1 = aux1; tmpc->aux2 = aux2; executive = cpu_structs[target_cpu].executive_task; tmpc->ctrl = ctrl = &executive->server_ctrl; PUSHFD CLI while (LBts(&ctrl->flags, JOBCf_LOCKED)) Yield; if (ctrl->next_waiting == ctrl && LBtr(&executive->task_flags, TASKf_AWAITING_MESSAGE)) MPInt(I_WAKE, target_cpu); QueueInsert(tmpc, ctrl->last_waiting); LBtr(&ctrl->flags, JOBCf_LOCKED); POPFD return tmpc; } CTask *SpawnQueue(U0 (*fp_addr)(U8 *data), U8 *data=NULL, U8 *task_name=NULL, I64 target_cpu, CTask *parent=NULL, //NULL means sys_task I64 stack_size=0, I64 flags=1 << JOBf_ADD_TO_QUE) { CTask *res; CJob *tmpc = JobQueue(fp_addr, data, target_cpu, flags, JOBT_SPAWN_TASK, task_name, parent, stack_size); CJobCtrl *ctrl; while (!Bt(&tmpc->flags, JOBf_DONE)) { LBts(&Fs->task_flags, TASKf_IDLE); Yield; } LBtr(&Fs->task_flags, TASKf_IDLE); res = tmpc->spawned_task; ctrl = tmpc->ctrl; PUSHFD CLI while (LBts(&ctrl->flags, JOBCf_LOCKED)) Yield; QueueRemove(tmpc); LBtr(&ctrl->flags, JOBCf_LOCKED); POPFD JobDel(tmpc); return res; } U0 CoreAPExecutiveInit() {//Called by multicore's executive task after Core0StartMP() //as the first thing a CPU does before waiting for jobs. MPAPICInit; Fs->rip = &CoreAPExecutiveTask; TaskContextRestore; } U0 Core0StartMP() {//Called by sys_task during start-up. CTask *task; U8 buf[STR_LEN]; CAP16BitInit *mp = MP_VECT_ADDR; CCPU *c; I64 i, my_mp_count; PUSHFD CLI if (mp_count > 1) { my_mp_count = mp_count; MPHalt; //sets mp_count to 1 for (i = 1; i < my_mp_count; i++) { c = &cpu_structs[i]; JobQueueDel(&c->executive_task->server_ctrl.next_waiting); JobQueueDel(&c->executive_task->server_ctrl.next_done); } } MemSet(&cpu_structs[1], 0, sizeof(CCPU) * (MP_PROCESSORS_NUM - 1)); //When you start-up other cores, they jump to an address //specified by a byte vect number, MPN_VECT which corresponds //to a location 4096*vect number, MP_VECT_ADDR. MemCopy(mp, COREAP_16BIT_INIT, COREAP_16BIT_INIT_END-COREAP_16BIT_INIT); MemCopy(&mp->ap_gdt_ptr, SYS_GDT_PTR, sizeof(CSysLimitBase)); mp_count_initial = mp_count = 1; mp_count_lock = 0; *(dev.uncached_alias + LAPIC_LVT_ERR)(U32 *) = *(dev.uncached_alias + LAPIC_LVT_ERR)(U32 *) & 0xFFFFFF00 + MPN_VECT; WBINVD //Not sure why this is needed. Might just need delay. MemCopy above? *(dev.uncached_alias + LAPIC_ICR_LOW)(U32 *) = 0xC4500; //assert init IPI Busy(10000); *(dev.uncached_alias + LAPIC_ICR_LOW)(U32 *) = 0xC4600 + MPN_VECT; //start-up Busy(200); *(dev.uncached_alias + LAPIC_ICR_LOW)(U32 *) = 0xC4600 + MPN_VECT; Busy(100000); for (i = 0; i < 10000; i++) LBts(&mp_count_lock, 0); //Don't let more through my_mp_count = mp_count_initial; if (my_mp_count > MP_PROCESSORS_NUM) my_mp_count = MP_PROCESSORS_NUM; for (i = 1; i < my_mp_count; i++) { StrPrint(buf, "Executive CPU%02X", i); task = Spawn(&CoreAPExecutiveInit, NULL, buf,,, MEM_EXECUTIVE_STACK, 0); task->rflags = RFLAGG_START; //CTask alloced off this core's executive_task's heap (Which is System task) CPUStructInit(i, &cpu_structs[i], task); WBINVD //Not sure why this is needed. Might just need delay. } //Make sure they're all up-and-running for (i = 1; i < my_mp_count; i++) while (!Bt(&cpu_structs[i].executive_task->task_flags, TASKf_AWAITING_MESSAGE)) PAUSE; POPFD mp_count = my_mp_count; //Finalize count } U0 Core0Init() {//Called by sys_task during start-up mp_count_initial = mp_count = 1; mp_count_lock = 0; debug.mp_crash = SysCAlloc(sizeof(CMPCrash)); //Must be in code heap because init code uses 32 bit address of cpu_struct sys_task->gs = cpu_structs = CAlloc(sizeof(CCPU) * MP_PROCESSORS_NUM, Fs->code_heap); CPUStructInit(0, cpu_structs, sys_task); //RAX has GS IMPORT SET_GS_BASE; CALL SET_GS_BASE MPAPICInit; } interrupt U0 IntMPCrash() {//Entering the debugger from another core causes an interrupt on Core0 //Which calls this routine. *(dev.uncached_alias + LAPIC_EOI)(U32 *) = 0; mp_count = 1; Raw(ON); text.raw_flags |= RAWF_SHOW_DOLLAR; "MP Crash CPU%02X Task:%08X\n" "RIP:%P\n", debug.mp_crash->cpu_num, debug.mp_crash->task, debug.mp_crash->rip; Panic(debug.mp_crash->message, debug.mp_crash->message_num); }