Virtualbox源码分析22 NEM(Hyper-V兼容)3 Emulation Thread

(157) 2024-03-19 15:01:01

Native execution manager (Emulation Thread )

文章目录

    • Native execution manager (Emulation Thread )
    • 22.1. Emulation循环线程
      • emR3NemExecute
      • nemR3NativeRunGC
      • R0的循环
      • NEMR0RunGuestCode
      • emR3NemForcedActions
    • 22.2 从GuestOS里获取和写入寄存器信息
      • nemR0WinImportState
      • nemR0WinExportState
    • 22.3中断处理
      • nemHCWinHandleInterruptFF

本篇重点介绍NEM模式的执行GuestOS相关的函数,基本框架和HM/Raw-mode完全一样,只是具体实现代码(调用的API)不同而已。

22.1. Emulation循环线程

emR3NemExecute

主循环函数,实现原理和过程和VMX/Raw-mode几乎一样,部分处理action的代码可以参考EM一篇

EMR3Nem.cpp

VBOXSTRICTRC emR3NemExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone)
{ 
   
    //死循环执行GuestOS代码
    for (;;)
    { 
   
        //先执行Force Action,处理pending的事件
        if (   VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_RAW_MASK)
            || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK))
        { 
   
            rcStrict = emR3NemForcedActions(pVM, pVCpu);
            if (rcStrict != VINF_SUCCESS)
                break;
        }
        //执行代码
        if (RT_LIKELY(emR3IsExecutionAllowed(pVM, pVCpu)))
        { 
   
            rcStrict = NEMR3RunGC(pVM, pVCpu);
        }
        else
        { 
   
            //暂时没有执行权限,sleep5毫秒之后再执行
            RTThreadSleep(5);
            rcStrict = VINF_SUCCESS;
        }
        //处理高优先级的pending事件,调用的是EM里的函数,已在EM一篇里介绍
        VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK);
        if (   VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK)
            || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_POST_MASK))
            rcStrict = emR3HighPriorityPostForcedActions(pVM, pVCpu, rcStrict);
        //返回值错误,退出循环
        if (rcStrict >= VINF_EM_FIRST && rcStrict <= VINF_EM_LAST)
            break;
	    //处理返回值,其实调用的函数是emR3HmHandleRC,已在EM一篇里介绍
        //emR3NemHandleRC 返回VINF_SUCCESS,表示可以继续执行,其他值需要退出循环返回R3处理
        rcStrict = emR3NemHandleRC(pVM, pVCpu, VBOXSTRICTRC_TODO(rcStrict));
        if (rcStrict != VINF_SUCCESS)
            break;
        //处理forceaction
        if (   VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK)
            || VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_MASK))
        { 
   
            //调用的是EM里的函数,已在EM一篇里介绍
            rcStrict = emR3ForcedActions(pVM, pVCpu, VBOXSTRICTRC_TODO(rcStrict));
            VBOXVMM_EM_FF_ALL_RET(pVCpu, VBOXSTRICTRC_VAL(rcStrict));
            //如果返回错误或者需要重新决定运行模式,退出循环
            if (   rcStrict != VINF_SUCCESS
                && rcStrict != VINF_EM_RESCHEDULE_HM)
            { 
   
                *pfFFDone = true;
                break;
            }
        }
	}//end of for
    if (pVCpu->cpum.GstCtx.fExtrn)
    { 
   
        //根据fExtrn里的flags从GuestOS里读取对应寄存器,方便后面处理
        int rcImport = NEMImportStateOnDemand(pVCpu, pVCpu->cpum.GstCtx.fExtrn);
    }
}

nemR3NativeRunGC

VBOXSTRICTRC nemR3NativeRunGC(PVM pVM, PVMCPU pVCpu)
{ 
   
    //开启了R0循环
    if (pVM->nem.s.fUseRing0Runloop)
    { 
   
        for (;;)
        { 
   
            //IOCTL调用R0指令执行GuestOS代码
            VBOXSTRICTRC rcStrict = VMMR3CallR0EmtFast(pVM, pVCpu, VMMR0_DO_NEM_RUN);
            //下面是处理返回值
            if (RT_SUCCESS(rcStrict))
            { 
   
                //如果标记了需要刷新TLB,先执行,处理后面的事件,防止后面读取到错误的内存
                VBOXSTRICTRC rcPending = pVCpu->nem.s.rcPending;
                pVCpu->nem.s.rcPending = VINF_SUCCESS;
                if (rcStrict == VINF_NEM_FLUSH_TLB || rcPending == VINF_NEM_FLUSH_TLB)
                { 
   
                    //刷新TLB
                    int rc = PGMFlushTLB(pVCpu, CPUMGetGuestCR3(pVCpu), true);
                    if (rcStrict == VINF_NEM_FLUSH_TLB)
                    { 
   
                        if (   !VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK | VM_FF_HP_R0_PRE_HM_MASK)
                            && !VMCPU_FF_IS_ANY_SET(pVCpu,   (VMCPU_FF_HIGH_PRIORITY_POST_MASK | VMCPU_FF_HP_R0_PRE_HM_MASK)
                                                           & ~VMCPU_FF_RESUME_GUEST_MASK))
                        { 
   
                            //如果没有其它高优先级要处理的事件,去掉VMCPU_FF_RESUME_GUEST_MASK,继续执行GutestOS代码
                            VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK);
                            continue;
                        }
                        rcStrict = VINF_SUCCESS;
                    }
                }
            }
            return rcStrict;
        }
    }
}

R0的循环

VMMR0EntryFast IOCTl通知R0开始循环执行GuestOS代码

VMMR0DECL(void) VMMR0EntryFast(PGVM pGVM, PVMCC pVMIgnored, VMCPUID idCpu, VMMR0OPERATION enmOperation)
{ 
   
    ....
        case VMMR0_DO_NEM_RUN:
        { 
   
            //使用长跳挑战到NEMR0RunGuestCode(R0循环)
            int rc = vmmR0CallRing3SetJmp2(&pGVCpu->vmm.s.CallRing3JmpBufR0, (PFNVMMR0SETJMP2)NEMR0RunGuestCode, pGVM, idCpu);
            pGVCpu->vmm.s.iLastGZRc = rc;
            VBOXVMM_R0_VMM_RETURN_TO_RING3_NEM(pGVCpu, CPUMQueryGuestCtxPtr(pGVCpu), rc);
            break;
        }
}

NEMR0RunGuestCode

最终调用到nemHCWinRunGC,这个函数是个大杂烩,3种不同的实现都放到这个一个函数里,为了简化,只看R0循环的版本

NEM_TMPL_STATIC VBOXSTRICTRC nemHCWinRunGC(PVMCC pVM, PVMCPUCC pVCpu)
{ 
   
    //设置当前VCPu状态成EXEC_NEM
    if (VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM, VMCPUSTATE_STARTED))
    { 
    /* likely */ }
    else
    { 
   
        VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM, VMCPUSTATE_STARTED_EXEC_NEM_CANCELED);
        return VINF_SUCCESS;
    }
  	//从pvMsgSlotMapping里获取获取保存VEMxit的参数的内存地址
    VID_MESSAGE_MAPPING_HEADER volatile *pMappingHeader = (VID_MESSAGE_MAPPING_HEADER volatile *)pVCpu->nem.s.pvMsgSlotMapping;
    //是否在被单步调试
    const bool      fSingleStepping     = DBGFIsStepping(pVCpu);
    //循环执行GuestOS代码
    for (unsigned iLoop = 0;; iLoop++)
    { 
         
        pVCpu->nem.s.fDesiredInterruptWindows = 0;
        //如果有pending的中断
        if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_UPDATE_APIC | VMCPU_FF_INTERRUPT_PIC
                                     | VMCPU_FF_INTERRUPT_NMI  | VMCPU_FF_INTERRUPT_SMI))
        { 
   
            //而且标记需要从hypervisor里获取VMExit信息
            if (pVCpu->nem.s.fHandleAndGetFlags == VID_MSHAGN_F_GET_NEXT_MESSAGE)
            { 
   
                pVCpu->nem.s.fHandleAndGetFlags = 0;
                //退出GuestOS,处理VMExit(下一篇里介绍)
                rcStrict = nemHCWinStopCpu(pVM, pVCpu, rcStrict, pMappingHeader);
                if (rcStrict == VINF_SUCCESS)
                { 
    /* likely */ }
                else
                { 
   
                    break;
                }
            }
            //如果有pending的中断,先注入中断
            rcStrict = nemHCWinHandleInterruptFF(pVM, pVCpu, &pVCpu->nem.s.fDesiredInterruptWindows);
            if (rcStrict == VINF_SUCCESS)
            { 
    /* likely */ }
            else
            { 
   
                break;
            }
        }
        //在处理vmexit和forceaction是的时候修改了GuestOS的寄存器,调用hypercall把这些修改过的寄存器写到GuestOS里
        if ((pVCpu->cpum.GstCtx.fExtrn & (CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK))
               !=                              (CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK)
            || (  (   pVCpu->nem.s.fDesiredInterruptWindows
                   || pVCpu->nem.s.fCurrentInterruptWindows != pVCpu->nem.s.fDesiredInterruptWindows)
                && pVCpu->nem.s.fHandleAndGetFlags != VID_MSHAGN_F_GET_NEXT_MESSAGE)
           )
        { 
   
            int rc2 = nemR0WinExportState(pVM, pVCpu, &pVCpu->cpum.GstCtx);
        }
        //运行GuestOS代码
        uint64_t       offDeltaIgnored;
        uint64_t const nsNextTimerEvt = TMTimerPollGIP(pVM, pVCpu, &offDeltaIgnored);
        //如果有标记需要返回R3,则不能进入GuestOS
        if (   !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
            && !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
        { 
   
            if (pVCpu->nem.s.fHandleAndGetFlags)
            { 
    
                //是否有没有处理完的时间,如果有,不能进入GuestOS,继续循环
            }
            else
            { 
   
                pVCpu->nem.s.uIoCtlBuf.idCpu = pVCpu->idCpu;
                //运行GuestOS代码
                NTSTATUS rcNt = nemR0NtPerformIoControl(pVM, pVCpu, pVM->nemr0.s.IoCtlStartVirtualProcessor.uFunction,
                                                        &pVCpu->nem.s.uIoCtlBuf.idCpu, sizeof(pVCpu->nem.s.uIoCtlBuf.idCpu),
                                                        NULL, 0);
                                      VERR_NEM_IPE_5);
                //有VMExit事件,标记需要获取VMExit事件
                pVCpu->nem.s.fHandleAndGetFlags = VID_MSHAGN_F_GET_NEXT_MESSAGE;
            }
            //设置当前VCPU状态成NEM WAIT状态
            if (VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM_WAIT, VMCPUSTATE_STARTED_EXEC_NEM))
            { 
   
                //计算运行时间
                uint64_t const  nsNow           = RTTimeNanoTS();
                int64_t const   cNsNextTimerEvt = nsNow - nsNextTimerEvt;
                uint32_t        cMsWait;
                if (cNsNextTimerEvt < 100000 /* ns */)
                    cMsWait = 0;
                else if ((uint64_t)cNsNextTimerEvt < RT_NS_1SEC)
                { 
   
                    if ((uint32_t)cNsNextTimerEvt < 2*RT_NS_1MS)
                        cMsWait = 1;
                    else
                        cMsWait = ((uint32_t)cNsNextTimerEvt - 100000 /*ns*/) / RT_NS_1MS;
                }
                else
                    cMsWait = RT_MS_1SEC;
                //调用MessageSlotHandleAndGetNext获取VMExit信息
                pVCpu->nem.s.uIoCtlBuf.MsgSlotHandleAndGetNext.iCpu     = pVCpu->idCpu;
                pVCpu->nem.s.uIoCtlBuf.MsgSlotHandleAndGetNext.fFlags   = pVCpu->nem.s.fHandleAndGetFlags;
                pVCpu->nem.s.uIoCtlBuf.MsgSlotHandleAndGetNext.cMillies = cMsWait;
                NTSTATUS rcNt = nemR0NtPerformIoControl(pVM, pVCpu, pVM->nemr0.s.IoCtlMessageSlotHandleAndGetNext.uFunction,
                                                        &pVCpu->nem.s.uIoCtlBuf.MsgSlotHandleAndGetNext,
                                                        pVM->nemr0.s.IoCtlMessageSlotHandleAndGetNext.cbInput,
                                                        NULL, 0);
                //设置当前VCPU状态成NEM STARTED_EXEC 状态,准备下一个循环
                VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM, VMCPUSTATE_STARTED_EXEC_NEM_WAIT);
                if (rcNt == STATUS_SUCCESS)
                { 
   
                    //获取VMExit信息成功,没有VMExit信息再获取了
                    // 处理VMExit,下一篇里介绍这个函数
                    rcStrict = nemHCWinHandleMessage(pVM, pVCpu, pMappingHeader);
                    //已经获取了VMExit信息,去掉这个flag
                    pVCpu->nem.s.fHandleAndGetFlags |= VID_MSHAGN_F_HANDLE_MESSAGE;
                    if (rcStrict == VINF_SUCCESS)
                    { 
    /* hopefully likely */ }
                    else
                    { 
   
                        //处理失败,返回R3
                        break;
                    }
                }
                else
                { 
   
                    //获取VMExit信息失败,标记需要再次获取 (退出循环之后先停止VCPU再获取一次)
                    pVCpu->nem.s.fHandleAndGetFlags = VID_MSHAGN_F_GET_NEXT_MESSAGE;
                }
                //如果没有被设置VM_FF_HP_R0_PRE_HM_MASK,继续执行GuestOS,如果有,退出返回到R3处理
                //TODO:这个是个什么flags?
                if (   !VM_FF_IS_ANY_SET(   pVM,   !fSingleStepping ? VM_FF_HP_R0_PRE_HM_MASK    : VM_FF_HP_R0_PRE_HM_STEP_MASK)
                    && !VMCPU_FF_IS_ANY_SET(pVCpu, !fSingleStepping ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
                    continue;
		  }
        }    
    }//end of for
    
    if (pVCpu->nem.s.fHandleAndGetFlags == VID_MSHAGN_F_GET_NEXT_MESSAGE)
    { 
   
        //停止VCPU并且处理VMExit信息 (下一篇里介绍这个函数)
        pVCpu->nem.s.fHandleAndGetFlags = 0;
        rcStrict = nemHCWinStopCpu(pVM, pVCpu, rcStrict, pMappingHeader);
    }
    //VCPU状态变成STARTED
    if (!VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC_NEM))
        VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC_NEM_CANCELED);
    //从hypervisor里获取GuestOS寄存器相关信息,为返回R3之后处理准备环境
    if (pVCpu->cpum.GstCtx.fExtrn & (CPUMCTX_EXTRN_ALL | (CPUMCTX_EXTRN_NEM_WIN_MASK & ~CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT)))
    { 
   
        //标记需要获取中断相关信息
        uint64_t fImport = IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI;
        if (   (rcStrict >= VINF_EM_FIRST && rcStrict <= VINF_EM_LAST)
            || RT_FAILURE(rcStrict))
            //运行错误,获取所有寄存器
            fImport = CPUMCTX_EXTRN_ALL | (CPUMCTX_EXTRN_NEM_WIN_MASK & ~CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT);
        else if (   rcStrict == VINF_IOM_R3_IOPORT_COMMIT_WRITE
                 || rcStrict == VINF_EM_PENDING_R3_IOPORT_WRITE)
            //需要R3处理IO写操作,获取cs/rip/rflags/中断屏蔽位等信息
            fImport = CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT;
        else if (rcStrict == VINF_EM_PENDING_R3_IOPORT_READ)
            //需要R3处理IO读操作,获取rax/cs/rip/rflags/中断屏蔽位等信息
            fImport = CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT;
        else if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_APIC
                                          | VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI))
            //有pending中断,获取中断相关信息
            fImport |= IEM_CPUMCTX_EXTRN_XCPT_MASK;
        
        if (pVCpu->cpum.GstCtx.fExtrn & fImport)
        { 
   
            //调用IOCTL
            int rc2 = nemR0WinImportState(pVM, pVCpu, &pVCpu->cpum.GstCtx, fImport | CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT,
                                          true /*fCanUpdateCr3*/);
            if (RT_SUCCESS(rc2))
                pVCpu->cpum.GstCtx.fExtrn &= ~fImport;
            else if (rc2 == VERR_NEM_FLUSH_TLB)
            { 
   
                pVCpu->cpum.GstCtx.fExtrn &= ~fImport;
                if (rcStrict == VINF_SUCCESS || rcStrict == -rc2)
                    rcStrict = -rc2;
                else
                { 
   
                    pVCpu->nem.s.rcPending = -rc2;
                }
            }
            else if (RT_SUCCESS(rcStrict))
                rcStrict = rc2;
            if (!(pVCpu->cpum.GstCtx.fExtrn & (CPUMCTX_EXTRN_ALL | (CPUMCTX_EXTRN_NEM_WIN_MASK & ~CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT))))
                pVCpu->cpum.GstCtx.fExtrn = 0;
        }
    }
}

emR3NemForcedActions

static int emR3NemForcedActions(PVM pVM, PVMCPU pVCpu)
{ 
   
    //不处理sync cr3的处理?
    if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
    { 
   
        VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL);
    }
    //内存不够,需要分配内存
    if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PGM_NEED_HANDY_PAGES, VM_FF_PGM_NO_MEMORY))
    { 
   
        //分配内存,如果申请成功,会clear掉VM_FF_PGM_NO_MEMORY flags
        int rc = PGMR3PhysAllocateHandyPages(pVM);
        if (RT_FAILURE(rc))
            return rc;
    }
    //如果还是no memory,返回错误码
    if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
        return VINF_EM_NO_MEMORY;
    return VINF_SUCCESS;
}

22.2 从GuestOS里获取和写入寄存器信息

nemR0WinImportState

从Hypervisor里获取寄存器的值

NEM_TMPL_STATIC int nemR0WinImportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx, uint64_t fWhat, bool fCanUpdateCr3)
{ 
   
    HV_INPUT_GET_VP_REGISTERS *pInput = (HV_INPUT_GET_VP_REGISTERS *)pGVCpu->nemr0.s.HypercallData.pbPage;
    pInput->PartitionId = pGVM->nemr0.s.idHvPartition;
    pInput->VpIndex     = pGVCpu->idCpu;
    pInput->fFlags      = 0;
    
    //设置每个register的名字
    uintptr_t iReg = 0;
    if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
    { 
   
        if (fWhat & CPUMCTX_EXTRN_RAX)
            pInput->Names[iReg++] = HvX64RegisterRax;
        if (fWhat & CPUMCTX_EXTRN_RCX)
            pInput->Names[iReg++] = HvX64RegisterRcx;
        ...
    }
    //段寄存器
    if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
    { 
   
        if (fWhat & CPUMCTX_EXTRN_CS)
            pInput->Names[iReg++] = HvX64RegisterCs;
    }
    ...
    //crx
    if (fWhat & CPUMCTX_EXTRN_CR_MASK)
    { 
   
        if (fWhat & CPUMCTX_EXTRN_CR0)
            pInput->Names[iReg++] = HvX64RegisterCr0;
      	...
    }
  	if (fWhat & CPUMCTX_EXTRN_APIC_TPR)
        pInput->Names[iReg++] = HvX64RegisterCr8;
    //xmm
  	if (fWhat & CPUMCTX_EXTRN_SSE_AVX)
    { 
   
        pInput->Names[iReg++] = HvX64RegisterXmm0;
        pInput->Names[iReg++] = HvX64RegisterXmm1
    }
    ...
    //调用hypercall,获取register里的值
    uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallGetVpRegisters, cRegs),
                                               pGVCpu->nemr0.s.HypercallData.HCPhysPage,
                                               pGVCpu->nemr0.s.HypercallData.HCPhysPage + cbInput);
    //拷贝获取的寄存器值到CPUMCTX结构体里
  	if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
    { 
   
        if (fWhat & CPUMCTX_EXTRN_RAX)
        { 
   
            pCtx->rax = paValues[iReg++].Reg64;
        }
    }
    ...
    if (fWhat & CPUMCTX_EXTRN_CR0)
    { 
   
      //部分寄存器值改变会改变CPU的行为,所以需要调用相应的API改变CPU的行为
      if (pCtx->cr0 != paValues[iReg].Reg64)
      { 
   
        CPUMSetGuestCR0(pGVCpu, paValues[iReg].Reg64);
        fMaybeChangedMode = true;
      }
      iReg++;
    }
    ...
  	if (fWhat & CPUMCTX_EXTRN_EFER)
    { 
   
      	//efer寄存器改变
        if (paValues[iReg].Reg64 != pCtx->msrEFER)
        { 
   
          	//如果修改了NXE位,通知PGM NX状态改变
            if ((paValues[iReg].Reg64 ^ pCtx->msrEFER) & MSR_K6_EFER_NXE)
                PGMNotifyNxeChanged(pGVCpu, RT_BOOL(paValues[iReg].Reg64 & MSR_K6_EFER_NXE));
            pCtx->msrEFER = paValues[iReg].Reg64;
            fMaybeChangedMode = true;
        }
        iReg++;
    }
    ...
   	if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
    { 
   
        //APIC Base改变,需要设置虚拟APIC里的base
        const uint64_t uOldBase = APICGetBaseMsrNoCheck(pGVCpu);
        if (paValues[iReg].Reg64 != uOldBase)
        { 
   
            int rc2 = APICSetBaseMsr(pGVCpu, paValues[iReg].Reg64);
        }
    }
    ...
    //设置中断屏蔽标记位
    if (fWhat & (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
    { 
   
        if (!(pCtx->fExtrn & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT))
        { 
   
            pGVCpu->nem.s.fLastInterruptShadow = paValues[iReg].InterruptState.InterruptShadow;
            if (paValues[iReg].InterruptState.InterruptShadow)
                EMSetInhibitInterruptsPC(pGVCpu, paValues[iReg + 1].Reg64);
            else
                VMCPU_FF_CLEAR(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
        }
        if (!(pCtx->fExtrn & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
        { 
   
            if (paValues[iReg].InterruptState.NmiMasked)
                VMCPU_FF_SET(pGVCpu, VMCPU_FF_BLOCK_NMIS);
            else
                VMCPU_FF_CLEAR(pGVCpu, VMCPU_FF_BLOCK_NMIS);
        }
        fWhat |= CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI;
        iReg += 2;
  }
  //运行模式改变,通知PGM做出对应改变
  int rc = VINF_SUCCESS;
  if (fMaybeChangedMode)
  { 
   
      rc = PGMChangeMode(pGVCpu, pCtx->cr0, pCtx->cr4, pCtx->msrEFER);
  }
  ...
}

nemR0WinExportState

把CPUMCTX里的寄存器写入GuestOS里,原理和上面ImportState类似,写把所有寄存器的名字/value写入HypercallData.pbPage里,然后调用HvCallSetVpRegisters hypercall写入GuestOS寄存器信息,相关寄存器的定义可以参考下面的文档:

https://docs.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/whvvirtualprocessordatatypes

Virtualbox对应的头文件定义在 include\iprt\nt\hyperv.h里

NEM_TMPL_STATIC int nemR0WinExportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx)
{ 
   
    HV_INPUT_SET_VP_REGISTERS *pInput = (HV_INPUT_SET_VP_REGISTERS *)pGVCpu->nemr0.s.HypercallData.pbPage;
    uint64_t const fWhat = ~pCtx->fExtrn & (CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK);
    //根据fWhat里的flag,写入需要修改的GuestOS寄存器名字和value
    if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
    { 
   
        //rax
        if (fWhat & CPUMCTX_EXTRN_RAX)
        { 
   
            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
            pInput->Elements[iReg].Name                = HvX64RegisterRax;
            pInput->Elements[iReg].Value.Reg64         = pCtx->rax;
            iReg++;
        }
        ...
    }
    //段寄存器
    if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
    { 
   
        if (fWhat & CPUMCTX_EXTRN_CS)
        { 
   
            COPY_OUT_SEG(iReg, HvX64RegisterCs,   pCtx->cs);
            iReg++;
        }
        ...
    }
    //有pending的event,需要获取pending中断信息和event vector
    if (fWhat & CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT)
    { 
   
        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
        pInput->Elements[iReg].Name                 = HvRegisterPendingInterruption;
        pInput->Elements[iReg].Value.Reg64          = 0;
        iReg++;
    }
    
    //如果屏蔽了中断和nmi,需要写入WHV_X64_INTERRUPT_STATE_REGISTER的信息(InterruptShadow/NmiMasked)
    if (   (fWhat & (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
        ==          (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI) )
    { 
   
        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
        pInput->Elements[iReg].Name                 = HvRegisterInterruptState;
        pInput->Elements[iReg].Value.Reg64          = 0;
        if (   VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
            && EMGetInhibitInterruptsPC(pGVCpu) == pCtx->rip)
            pInput->Elements[iReg].Value.InterruptState.InterruptShadow = 1;
        if (VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_BLOCK_NMIS))
            pInput->Elements[iReg].Value.InterruptState.NmiMasked = 1;
        iReg++;
    }
    //如果屏蔽了中断,写入InterruptShadow信息
    else if (fWhat & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT)
    { 
   
        if (   pGVCpu->nem.s.fLastInterruptShadow
            || (   VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
                && EMGetInhibitInterruptsPC(pGVCpu) == pCtx->rip))
        { 
   
            HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
            pInput->Elements[iReg].Name                 = HvRegisterInterruptState;
            pInput->Elements[iReg].Value.Reg64          = 0;
            if (   VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
                && EMGetInhibitInterruptsPC(pGVCpu) == pCtx->rip)
                pInput->Elements[iReg].Value.InterruptState.InterruptShadow = 1;
            iReg++;
        }
    }
    //有被屏蔽的中断,写入WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER告诉Guest中断被屏蔽了
    //fDesiredInterruptWindows在nemHCWinHandleInterruptFF里被复制
    uint8_t const fDesiredIntWin = pGVCpu->nem.s.fDesiredInterruptWindows;
    if (   fDesiredIntWin
        || pGVCpu->nem.s.fCurrentInterruptWindows != fDesiredIntWin)
    { 
   
        pGVCpu->nem.s.fCurrentInterruptWindows = pGVCpu->nem.s.fDesiredInterruptWindows;
        HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
        pInput->Elements[iReg].Name                                         = HvX64RegisterDeliverabilityNotifications;
        pInput->Elements[iReg].Value.DeliverabilityNotifications.AsUINT64   = fDesiredIntWin;
        iReg++;
    }
    //寄存器设置完毕,调用HvCallSetVpRegisters hyperV把寄存器写入GuestOS里
    uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallSetVpRegisters, iReg),
                                               pGVCpu->nemr0.s.HypercallData.HCPhysPage, 0 /*GCPhysOutput*/);
    pCtx->fExtrn |= CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK | CPUMCTX_EXTRN_KEEPER_NEM;
    return VINF_SUCCESS;
}

22.3中断处理

nemHCWinHandleInterruptFF

处理pending中断

NEM_TMPL_STATIC VBOXSTRICTRC nemHCWinHandleInterruptFF(PVMCC pVM, PVMCPUCC pVCpu, uint8_t *pfInterruptWindows)
{ 
   
    //如果有pending中的,把中断从APIC的PIB内存里更新到IRR里排队
    if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
    { 
   
       	//具体实现见前面APIC一篇
        APICUpdatePendingInterrupts(pVCpu);
        //如果没有中断需要处理,直接返回
        if (!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC
                                      | VMCPU_FF_INTERRUPT_NMI  | VMCPU_FF_INTERRUPT_SMI))
            return VINF_SUCCESS;
    }
   	//现在无法处理SMI中断,返回错误
    AssertReturn(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_SMI), VERR_NEM_IPE_0);
    //是否有nmi中断
    bool const fPendingNmi = VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI);
    //从调用hypercall获取GuestOS相关寄存器值
    uint64_t   fNeedExtrn  = CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | (fPendingNmi ? CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI : 0);
    if (pVCpu->cpum.GstCtx.fExtrn & fNeedExtrn)
    { 
   
        VBOXSTRICTRC rcStrict = nemHCWinImportStateIfNeededStrict(pVCpu, NEM_WIN_CPUMCTX_EXTRN_MASK_FOR_IEM_XCPT, "IntFF");
        if (rcStrict != VINF_SUCCESS)
            return rcStrict;
    }
    //如果开启了屏蔽中断而且当前rip是发生中断的RIP(当前发生的中断被屏蔽了)
    bool const fInhibitInterrupts = VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
                                 && EMGetInhibitInterruptsPC(pVCpu) == pVCpu->cpum.GstCtx.rip;
    
    //有pending的nmi中断,调用IEM里函数模拟执行注入中断
    if (fPendingNmi)
    { 
   
        if (   !fInhibitInterrupts
            && !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
        { 
   
            VBOXSTRICTRC rcStrict = nemHCWinImportStateIfNeededStrict(pVCpu, NEM_WIN_CPUMCTX_EXTRN_MASK_FOR_IEM_XCPT, "NMI");
            if (rcStrict == VINF_SUCCESS)
            { 
   
                VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
                rcStrict = IEMInjectTrap(pVCpu, X86_XCPT_NMI, TRPM_HARDWARE_INT, 0, 0, 0);
            }
            return rcStrict;
        }
        //中断被屏蔽了
        *pfInterruptWindows |= NEM_WIN_INTW_F_NMI;
    }

    //有APIC/PIC中断
    if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
    { 
   
        if (   !fInhibitInterrupts
            && pVCpu->cpum.GstCtx.rflags.Bits.u1IF)
        { 
   
            //获取需要的寄存器信息
            VBOXSTRICTRC rcStrict = nemHCWinImportStateIfNeededStrict(pVCpu, NEM_WIN_CPUMCTX_EXTRN_MASK_FOR_IEM_XCPT, "NMI");
            if (rcStrict == VINF_SUCCESS)
            { 
   
                //从APIC里获取一个优先级最高的中断,移动到APIC设备的ISR队列中
                uint8_t bInterrupt;
                int rc = PDMGetInterrupt(pVCpu, &bInterrupt);
                if (RT_SUCCESS(rc))
                { 
   
                    //模拟执行注入中断
                    rcStrict = IEMInjectTrap(pVCpu, bInterrupt, TRPM_HARDWARE_INT, 0, 0, 0);
                }
                //中断优先级小于当前VCPU运行优先级,被屏蔽了,记录下信息
                else if (rc == VERR_APIC_INTR_MASKED_BY_TPR)
                { 
   
                    *pfInterruptWindows |= (bInterrupt >> 4 /*??*/) << NEM_WIN_INTW_F_PRIO_SHIFT;
                }
            }
            return rcStrict;
        }
        //中断被屏蔽了,记录下信息
        *pfInterruptWindows |= NEM_WIN_INTW_F_REGULAR;
    }
}

THE END

发表回复