本篇重点介绍NEM模式的执行GuestOS相关的函数,基本框架和HM/Raw-mode完全一样,只是具体实现代码(调用的API)不同而已。
主循环函数,实现原理和过程和VMX/Raw-mode几乎一样,部分处理action的代码可以参考EM一篇
EMR3Nem.cpp
VBOXSTRICTRC emR3NemExecute(PVM pVM, PVMCPU pVCpu, bool *pfFFDone)
{
//死循环执行GuestOS代码
for (;;)
{
//先执行Force Action,处理pending的事件
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_PRE_RAW_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_PRE_RAW_MASK))
{
rcStrict = emR3NemForcedActions(pVM, pVCpu);
if (rcStrict != VINF_SUCCESS)
break;
}
//执行代码
if (RT_LIKELY(emR3IsExecutionAllowed(pVM, pVCpu)))
{
rcStrict = NEMR3RunGC(pVM, pVCpu);
}
else
{
//暂时没有执行权限,sleep5毫秒之后再执行
RTThreadSleep(5);
rcStrict = VINF_SUCCESS;
}
//处理高优先级的pending事件,调用的是EM里的函数,已在EM一篇里介绍
VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK);
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HIGH_PRIORITY_POST_MASK))
rcStrict = emR3HighPriorityPostForcedActions(pVM, pVCpu, rcStrict);
//返回值错误,退出循环
if (rcStrict >= VINF_EM_FIRST && rcStrict <= VINF_EM_LAST)
break;
//处理返回值,其实调用的函数是emR3HmHandleRC,已在EM一篇里介绍
//emR3NemHandleRC 返回VINF_SUCCESS,表示可以继续执行,其他值需要退出循环返回R3处理
rcStrict = emR3NemHandleRC(pVM, pVCpu, VBOXSTRICTRC_TODO(rcStrict));
if (rcStrict != VINF_SUCCESS)
break;
//处理forceaction
if ( VM_FF_IS_ANY_SET(pVM, VM_FF_ALL_MASK)
|| VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_ALL_MASK))
{
//调用的是EM里的函数,已在EM一篇里介绍
rcStrict = emR3ForcedActions(pVM, pVCpu, VBOXSTRICTRC_TODO(rcStrict));
VBOXVMM_EM_FF_ALL_RET(pVCpu, VBOXSTRICTRC_VAL(rcStrict));
//如果返回错误或者需要重新决定运行模式,退出循环
if ( rcStrict != VINF_SUCCESS
&& rcStrict != VINF_EM_RESCHEDULE_HM)
{
*pfFFDone = true;
break;
}
}
}//end of for
if (pVCpu->cpum.GstCtx.fExtrn)
{
//根据fExtrn里的flags从GuestOS里读取对应寄存器,方便后面处理
int rcImport = NEMImportStateOnDemand(pVCpu, pVCpu->cpum.GstCtx.fExtrn);
}
}
VBOXSTRICTRC nemR3NativeRunGC(PVM pVM, PVMCPU pVCpu)
{
//开启了R0循环
if (pVM->nem.s.fUseRing0Runloop)
{
for (;;)
{
//IOCTL调用R0指令执行GuestOS代码
VBOXSTRICTRC rcStrict = VMMR3CallR0EmtFast(pVM, pVCpu, VMMR0_DO_NEM_RUN);
//下面是处理返回值
if (RT_SUCCESS(rcStrict))
{
//如果标记了需要刷新TLB,先执行,处理后面的事件,防止后面读取到错误的内存
VBOXSTRICTRC rcPending = pVCpu->nem.s.rcPending;
pVCpu->nem.s.rcPending = VINF_SUCCESS;
if (rcStrict == VINF_NEM_FLUSH_TLB || rcPending == VINF_NEM_FLUSH_TLB)
{
//刷新TLB
int rc = PGMFlushTLB(pVCpu, CPUMGetGuestCR3(pVCpu), true);
if (rcStrict == VINF_NEM_FLUSH_TLB)
{
if ( !VM_FF_IS_ANY_SET(pVM, VM_FF_HIGH_PRIORITY_POST_MASK | VM_FF_HP_R0_PRE_HM_MASK)
&& !VMCPU_FF_IS_ANY_SET(pVCpu, (VMCPU_FF_HIGH_PRIORITY_POST_MASK | VMCPU_FF_HP_R0_PRE_HM_MASK)
& ~VMCPU_FF_RESUME_GUEST_MASK))
{
//如果没有其它高优先级要处理的事件,去掉VMCPU_FF_RESUME_GUEST_MASK,继续执行GutestOS代码
VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_RESUME_GUEST_MASK);
continue;
}
rcStrict = VINF_SUCCESS;
}
}
}
return rcStrict;
}
}
}
VMMR0EntryFast IOCTl通知R0开始循环执行GuestOS代码
VMMR0DECL(void) VMMR0EntryFast(PGVM pGVM, PVMCC pVMIgnored, VMCPUID idCpu, VMMR0OPERATION enmOperation)
{
....
case VMMR0_DO_NEM_RUN:
{
//使用长跳挑战到NEMR0RunGuestCode(R0循环)
int rc = vmmR0CallRing3SetJmp2(&pGVCpu->vmm.s.CallRing3JmpBufR0, (PFNVMMR0SETJMP2)NEMR0RunGuestCode, pGVM, idCpu);
pGVCpu->vmm.s.iLastGZRc = rc;
VBOXVMM_R0_VMM_RETURN_TO_RING3_NEM(pGVCpu, CPUMQueryGuestCtxPtr(pGVCpu), rc);
break;
}
}
最终调用到nemHCWinRunGC,这个函数是个大杂烩,3种不同的实现都放到这个一个函数里,为了简化,只看R0循环的版本
NEM_TMPL_STATIC VBOXSTRICTRC nemHCWinRunGC(PVMCC pVM, PVMCPUCC pVCpu)
{
//设置当前VCPu状态成EXEC_NEM
if (VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM, VMCPUSTATE_STARTED))
{
/* likely */ }
else
{
VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM, VMCPUSTATE_STARTED_EXEC_NEM_CANCELED);
return VINF_SUCCESS;
}
//从pvMsgSlotMapping里获取获取保存VEMxit的参数的内存地址
VID_MESSAGE_MAPPING_HEADER volatile *pMappingHeader = (VID_MESSAGE_MAPPING_HEADER volatile *)pVCpu->nem.s.pvMsgSlotMapping;
//是否在被单步调试
const bool fSingleStepping = DBGFIsStepping(pVCpu);
//循环执行GuestOS代码
for (unsigned iLoop = 0;; iLoop++)
{
pVCpu->nem.s.fDesiredInterruptWindows = 0;
//如果有pending的中断
if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_UPDATE_APIC | VMCPU_FF_INTERRUPT_PIC
| VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI))
{
//而且标记需要从hypervisor里获取VMExit信息
if (pVCpu->nem.s.fHandleAndGetFlags == VID_MSHAGN_F_GET_NEXT_MESSAGE)
{
pVCpu->nem.s.fHandleAndGetFlags = 0;
//退出GuestOS,处理VMExit(下一篇里介绍)
rcStrict = nemHCWinStopCpu(pVM, pVCpu, rcStrict, pMappingHeader);
if (rcStrict == VINF_SUCCESS)
{
/* likely */ }
else
{
break;
}
}
//如果有pending的中断,先注入中断
rcStrict = nemHCWinHandleInterruptFF(pVM, pVCpu, &pVCpu->nem.s.fDesiredInterruptWindows);
if (rcStrict == VINF_SUCCESS)
{
/* likely */ }
else
{
break;
}
}
//在处理vmexit和forceaction是的时候修改了GuestOS的寄存器,调用hypercall把这些修改过的寄存器写到GuestOS里
if ((pVCpu->cpum.GstCtx.fExtrn & (CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK))
!= (CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK)
|| ( ( pVCpu->nem.s.fDesiredInterruptWindows
|| pVCpu->nem.s.fCurrentInterruptWindows != pVCpu->nem.s.fDesiredInterruptWindows)
&& pVCpu->nem.s.fHandleAndGetFlags != VID_MSHAGN_F_GET_NEXT_MESSAGE)
)
{
int rc2 = nemR0WinExportState(pVM, pVCpu, &pVCpu->cpum.GstCtx);
}
//运行GuestOS代码
uint64_t offDeltaIgnored;
uint64_t const nsNextTimerEvt = TMTimerPollGIP(pVM, pVCpu, &offDeltaIgnored);
//如果有标记需要返回R3,则不能进入GuestOS
if ( !VM_FF_IS_ANY_SET(pVM, VM_FF_EMT_RENDEZVOUS | VM_FF_TM_VIRTUAL_SYNC)
&& !VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_HM_TO_R3_MASK))
{
if (pVCpu->nem.s.fHandleAndGetFlags)
{
//是否有没有处理完的时间,如果有,不能进入GuestOS,继续循环
}
else
{
pVCpu->nem.s.uIoCtlBuf.idCpu = pVCpu->idCpu;
//运行GuestOS代码
NTSTATUS rcNt = nemR0NtPerformIoControl(pVM, pVCpu, pVM->nemr0.s.IoCtlStartVirtualProcessor.uFunction,
&pVCpu->nem.s.uIoCtlBuf.idCpu, sizeof(pVCpu->nem.s.uIoCtlBuf.idCpu),
NULL, 0);
VERR_NEM_IPE_5);
//有VMExit事件,标记需要获取VMExit事件
pVCpu->nem.s.fHandleAndGetFlags = VID_MSHAGN_F_GET_NEXT_MESSAGE;
}
//设置当前VCPU状态成NEM WAIT状态
if (VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM_WAIT, VMCPUSTATE_STARTED_EXEC_NEM))
{
//计算运行时间
uint64_t const nsNow = RTTimeNanoTS();
int64_t const cNsNextTimerEvt = nsNow - nsNextTimerEvt;
uint32_t cMsWait;
if (cNsNextTimerEvt < 100000 /* ns */)
cMsWait = 0;
else if ((uint64_t)cNsNextTimerEvt < RT_NS_1SEC)
{
if ((uint32_t)cNsNextTimerEvt < 2*RT_NS_1MS)
cMsWait = 1;
else
cMsWait = ((uint32_t)cNsNextTimerEvt - 100000 /*ns*/) / RT_NS_1MS;
}
else
cMsWait = RT_MS_1SEC;
//调用MessageSlotHandleAndGetNext获取VMExit信息
pVCpu->nem.s.uIoCtlBuf.MsgSlotHandleAndGetNext.iCpu = pVCpu->idCpu;
pVCpu->nem.s.uIoCtlBuf.MsgSlotHandleAndGetNext.fFlags = pVCpu->nem.s.fHandleAndGetFlags;
pVCpu->nem.s.uIoCtlBuf.MsgSlotHandleAndGetNext.cMillies = cMsWait;
NTSTATUS rcNt = nemR0NtPerformIoControl(pVM, pVCpu, pVM->nemr0.s.IoCtlMessageSlotHandleAndGetNext.uFunction,
&pVCpu->nem.s.uIoCtlBuf.MsgSlotHandleAndGetNext,
pVM->nemr0.s.IoCtlMessageSlotHandleAndGetNext.cbInput,
NULL, 0);
//设置当前VCPU状态成NEM STARTED_EXEC 状态,准备下一个循环
VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED_EXEC_NEM, VMCPUSTATE_STARTED_EXEC_NEM_WAIT);
if (rcNt == STATUS_SUCCESS)
{
//获取VMExit信息成功,没有VMExit信息再获取了
// 处理VMExit,下一篇里介绍这个函数
rcStrict = nemHCWinHandleMessage(pVM, pVCpu, pMappingHeader);
//已经获取了VMExit信息,去掉这个flag
pVCpu->nem.s.fHandleAndGetFlags |= VID_MSHAGN_F_HANDLE_MESSAGE;
if (rcStrict == VINF_SUCCESS)
{
/* hopefully likely */ }
else
{
//处理失败,返回R3
break;
}
}
else
{
//获取VMExit信息失败,标记需要再次获取 (退出循环之后先停止VCPU再获取一次)
pVCpu->nem.s.fHandleAndGetFlags = VID_MSHAGN_F_GET_NEXT_MESSAGE;
}
//如果没有被设置VM_FF_HP_R0_PRE_HM_MASK,继续执行GuestOS,如果有,退出返回到R3处理
//TODO:这个是个什么flags?
if ( !VM_FF_IS_ANY_SET( pVM, !fSingleStepping ? VM_FF_HP_R0_PRE_HM_MASK : VM_FF_HP_R0_PRE_HM_STEP_MASK)
&& !VMCPU_FF_IS_ANY_SET(pVCpu, !fSingleStepping ? VMCPU_FF_HP_R0_PRE_HM_MASK : VMCPU_FF_HP_R0_PRE_HM_STEP_MASK) )
continue;
}
}
}//end of for
if (pVCpu->nem.s.fHandleAndGetFlags == VID_MSHAGN_F_GET_NEXT_MESSAGE)
{
//停止VCPU并且处理VMExit信息 (下一篇里介绍这个函数)
pVCpu->nem.s.fHandleAndGetFlags = 0;
rcStrict = nemHCWinStopCpu(pVM, pVCpu, rcStrict, pMappingHeader);
}
//VCPU状态变成STARTED
if (!VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC_NEM))
VMCPU_CMPXCHG_STATE(pVCpu, VMCPUSTATE_STARTED, VMCPUSTATE_STARTED_EXEC_NEM_CANCELED);
//从hypervisor里获取GuestOS寄存器相关信息,为返回R3之后处理准备环境
if (pVCpu->cpum.GstCtx.fExtrn & (CPUMCTX_EXTRN_ALL | (CPUMCTX_EXTRN_NEM_WIN_MASK & ~CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT)))
{
//标记需要获取中断相关信息
uint64_t fImport = IEM_CPUMCTX_EXTRN_MUST_MASK | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI;
if ( (rcStrict >= VINF_EM_FIRST && rcStrict <= VINF_EM_LAST)
|| RT_FAILURE(rcStrict))
//运行错误,获取所有寄存器
fImport = CPUMCTX_EXTRN_ALL | (CPUMCTX_EXTRN_NEM_WIN_MASK & ~CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT);
else if ( rcStrict == VINF_IOM_R3_IOPORT_COMMIT_WRITE
|| rcStrict == VINF_EM_PENDING_R3_IOPORT_WRITE)
//需要R3处理IO写操作,获取cs/rip/rflags/中断屏蔽位等信息
fImport = CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT;
else if (rcStrict == VINF_EM_PENDING_R3_IOPORT_READ)
//需要R3处理IO读操作,获取rax/cs/rip/rflags/中断屏蔽位等信息
fImport = CPUMCTX_EXTRN_RAX | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_CS | CPUMCTX_EXTRN_RFLAGS | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT;
else if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_PIC | VMCPU_FF_INTERRUPT_APIC
| VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI))
//有pending中断,获取中断相关信息
fImport |= IEM_CPUMCTX_EXTRN_XCPT_MASK;
if (pVCpu->cpum.GstCtx.fExtrn & fImport)
{
//调用IOCTL
int rc2 = nemR0WinImportState(pVM, pVCpu, &pVCpu->cpum.GstCtx, fImport | CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT,
true /*fCanUpdateCr3*/);
if (RT_SUCCESS(rc2))
pVCpu->cpum.GstCtx.fExtrn &= ~fImport;
else if (rc2 == VERR_NEM_FLUSH_TLB)
{
pVCpu->cpum.GstCtx.fExtrn &= ~fImport;
if (rcStrict == VINF_SUCCESS || rcStrict == -rc2)
rcStrict = -rc2;
else
{
pVCpu->nem.s.rcPending = -rc2;
}
}
else if (RT_SUCCESS(rcStrict))
rcStrict = rc2;
if (!(pVCpu->cpum.GstCtx.fExtrn & (CPUMCTX_EXTRN_ALL | (CPUMCTX_EXTRN_NEM_WIN_MASK & ~CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT))))
pVCpu->cpum.GstCtx.fExtrn = 0;
}
}
}
static int emR3NemForcedActions(PVM pVM, PVMCPU pVCpu)
{
//不处理sync cr3的处理?
if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL))
{
VMCPU_FF_CLEAR_MASK(pVCpu, VMCPU_FF_PGM_SYNC_CR3 | VMCPU_FF_PGM_SYNC_CR3_NON_GLOBAL);
}
//内存不够,需要分配内存
if (VM_FF_IS_PENDING_EXCEPT(pVM, VM_FF_PGM_NEED_HANDY_PAGES, VM_FF_PGM_NO_MEMORY))
{
//分配内存,如果申请成功,会clear掉VM_FF_PGM_NO_MEMORY flags
int rc = PGMR3PhysAllocateHandyPages(pVM);
if (RT_FAILURE(rc))
return rc;
}
//如果还是no memory,返回错误码
if (VM_FF_IS_SET(pVM, VM_FF_PGM_NO_MEMORY))
return VINF_EM_NO_MEMORY;
return VINF_SUCCESS;
}
从Hypervisor里获取寄存器的值
NEM_TMPL_STATIC int nemR0WinImportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx, uint64_t fWhat, bool fCanUpdateCr3)
{
HV_INPUT_GET_VP_REGISTERS *pInput = (HV_INPUT_GET_VP_REGISTERS *)pGVCpu->nemr0.s.HypercallData.pbPage;
pInput->PartitionId = pGVM->nemr0.s.idHvPartition;
pInput->VpIndex = pGVCpu->idCpu;
pInput->fFlags = 0;
//设置每个register的名字
uintptr_t iReg = 0;
if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
{
if (fWhat & CPUMCTX_EXTRN_RAX)
pInput->Names[iReg++] = HvX64RegisterRax;
if (fWhat & CPUMCTX_EXTRN_RCX)
pInput->Names[iReg++] = HvX64RegisterRcx;
...
}
//段寄存器
if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
{
if (fWhat & CPUMCTX_EXTRN_CS)
pInput->Names[iReg++] = HvX64RegisterCs;
}
...
//crx
if (fWhat & CPUMCTX_EXTRN_CR_MASK)
{
if (fWhat & CPUMCTX_EXTRN_CR0)
pInput->Names[iReg++] = HvX64RegisterCr0;
...
}
if (fWhat & CPUMCTX_EXTRN_APIC_TPR)
pInput->Names[iReg++] = HvX64RegisterCr8;
//xmm
if (fWhat & CPUMCTX_EXTRN_SSE_AVX)
{
pInput->Names[iReg++] = HvX64RegisterXmm0;
pInput->Names[iReg++] = HvX64RegisterXmm1
}
...
//调用hypercall,获取register里的值
uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallGetVpRegisters, cRegs),
pGVCpu->nemr0.s.HypercallData.HCPhysPage,
pGVCpu->nemr0.s.HypercallData.HCPhysPage + cbInput);
//拷贝获取的寄存器值到CPUMCTX结构体里
if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
{
if (fWhat & CPUMCTX_EXTRN_RAX)
{
pCtx->rax = paValues[iReg++].Reg64;
}
}
...
if (fWhat & CPUMCTX_EXTRN_CR0)
{
//部分寄存器值改变会改变CPU的行为,所以需要调用相应的API改变CPU的行为
if (pCtx->cr0 != paValues[iReg].Reg64)
{
CPUMSetGuestCR0(pGVCpu, paValues[iReg].Reg64);
fMaybeChangedMode = true;
}
iReg++;
}
...
if (fWhat & CPUMCTX_EXTRN_EFER)
{
//efer寄存器改变
if (paValues[iReg].Reg64 != pCtx->msrEFER)
{
//如果修改了NXE位,通知PGM NX状态改变
if ((paValues[iReg].Reg64 ^ pCtx->msrEFER) & MSR_K6_EFER_NXE)
PGMNotifyNxeChanged(pGVCpu, RT_BOOL(paValues[iReg].Reg64 & MSR_K6_EFER_NXE));
pCtx->msrEFER = paValues[iReg].Reg64;
fMaybeChangedMode = true;
}
iReg++;
}
...
if (fWhat & CPUMCTX_EXTRN_OTHER_MSRS)
{
//APIC Base改变,需要设置虚拟APIC里的base
const uint64_t uOldBase = APICGetBaseMsrNoCheck(pGVCpu);
if (paValues[iReg].Reg64 != uOldBase)
{
int rc2 = APICSetBaseMsr(pGVCpu, paValues[iReg].Reg64);
}
}
...
//设置中断屏蔽标记位
if (fWhat & (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
{
if (!(pCtx->fExtrn & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT))
{
pGVCpu->nem.s.fLastInterruptShadow = paValues[iReg].InterruptState.InterruptShadow;
if (paValues[iReg].InterruptState.InterruptShadow)
EMSetInhibitInterruptsPC(pGVCpu, paValues[iReg + 1].Reg64);
else
VMCPU_FF_CLEAR(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS);
}
if (!(pCtx->fExtrn & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
{
if (paValues[iReg].InterruptState.NmiMasked)
VMCPU_FF_SET(pGVCpu, VMCPU_FF_BLOCK_NMIS);
else
VMCPU_FF_CLEAR(pGVCpu, VMCPU_FF_BLOCK_NMIS);
}
fWhat |= CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI;
iReg += 2;
}
//运行模式改变,通知PGM做出对应改变
int rc = VINF_SUCCESS;
if (fMaybeChangedMode)
{
rc = PGMChangeMode(pGVCpu, pCtx->cr0, pCtx->cr4, pCtx->msrEFER);
}
...
}
把CPUMCTX里的寄存器写入GuestOS里,原理和上面ImportState类似,写把所有寄存器的名字/value写入HypercallData.pbPage里,然后调用HvCallSetVpRegisters hypercall写入GuestOS寄存器信息,相关寄存器的定义可以参考下面的文档:
https://docs.microsoft.com/en-us/virtualization/api/hypervisor-platform/funcs/whvvirtualprocessordatatypes
Virtualbox对应的头文件定义在 include\iprt\nt\hyperv.h里
NEM_TMPL_STATIC int nemR0WinExportState(PGVM pGVM, PGVMCPU pGVCpu, PCPUMCTX pCtx)
{
HV_INPUT_SET_VP_REGISTERS *pInput = (HV_INPUT_SET_VP_REGISTERS *)pGVCpu->nemr0.s.HypercallData.pbPage;
uint64_t const fWhat = ~pCtx->fExtrn & (CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK);
//根据fWhat里的flag,写入需要修改的GuestOS寄存器名字和value
if (fWhat & CPUMCTX_EXTRN_GPRS_MASK)
{
//rax
if (fWhat & CPUMCTX_EXTRN_RAX)
{
HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
pInput->Elements[iReg].Name = HvX64RegisterRax;
pInput->Elements[iReg].Value.Reg64 = pCtx->rax;
iReg++;
}
...
}
//段寄存器
if (fWhat & CPUMCTX_EXTRN_SREG_MASK)
{
if (fWhat & CPUMCTX_EXTRN_CS)
{
COPY_OUT_SEG(iReg, HvX64RegisterCs, pCtx->cs);
iReg++;
}
...
}
//有pending的event,需要获取pending中断信息和event vector
if (fWhat & CPUMCTX_EXTRN_NEM_WIN_EVENT_INJECT)
{
HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
pInput->Elements[iReg].Name = HvRegisterPendingInterruption;
pInput->Elements[iReg].Value.Reg64 = 0;
iReg++;
}
//如果屏蔽了中断和nmi,需要写入WHV_X64_INTERRUPT_STATE_REGISTER的信息(InterruptShadow/NmiMasked)
if ( (fWhat & (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI))
== (CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI) )
{
HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
pInput->Elements[iReg].Name = HvRegisterInterruptState;
pInput->Elements[iReg].Value.Reg64 = 0;
if ( VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
&& EMGetInhibitInterruptsPC(pGVCpu) == pCtx->rip)
pInput->Elements[iReg].Value.InterruptState.InterruptShadow = 1;
if (VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_BLOCK_NMIS))
pInput->Elements[iReg].Value.InterruptState.NmiMasked = 1;
iReg++;
}
//如果屏蔽了中断,写入InterruptShadow信息
else if (fWhat & CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT)
{
if ( pGVCpu->nem.s.fLastInterruptShadow
|| ( VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
&& EMGetInhibitInterruptsPC(pGVCpu) == pCtx->rip))
{
HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
pInput->Elements[iReg].Name = HvRegisterInterruptState;
pInput->Elements[iReg].Value.Reg64 = 0;
if ( VMCPU_FF_IS_SET(pGVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
&& EMGetInhibitInterruptsPC(pGVCpu) == pCtx->rip)
pInput->Elements[iReg].Value.InterruptState.InterruptShadow = 1;
iReg++;
}
}
//有被屏蔽的中断,写入WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER告诉Guest中断被屏蔽了
//fDesiredInterruptWindows在nemHCWinHandleInterruptFF里被复制
uint8_t const fDesiredIntWin = pGVCpu->nem.s.fDesiredInterruptWindows;
if ( fDesiredIntWin
|| pGVCpu->nem.s.fCurrentInterruptWindows != fDesiredIntWin)
{
pGVCpu->nem.s.fCurrentInterruptWindows = pGVCpu->nem.s.fDesiredInterruptWindows;
HV_REGISTER_ASSOC_ZERO_PADDING_AND_HI64(&pInput->Elements[iReg]);
pInput->Elements[iReg].Name = HvX64RegisterDeliverabilityNotifications;
pInput->Elements[iReg].Value.DeliverabilityNotifications.AsUINT64 = fDesiredIntWin;
iReg++;
}
//寄存器设置完毕,调用HvCallSetVpRegisters hyperV把寄存器写入GuestOS里
uint64_t uResult = g_pfnHvlInvokeHypercall(HV_MAKE_CALL_INFO(HvCallSetVpRegisters, iReg),
pGVCpu->nemr0.s.HypercallData.HCPhysPage, 0 /*GCPhysOutput*/);
pCtx->fExtrn |= CPUMCTX_EXTRN_ALL | CPUMCTX_EXTRN_NEM_WIN_MASK | CPUMCTX_EXTRN_KEEPER_NEM;
return VINF_SUCCESS;
}
处理pending中断
NEM_TMPL_STATIC VBOXSTRICTRC nemHCWinHandleInterruptFF(PVMCC pVM, PVMCPUCC pVCpu, uint8_t *pfInterruptWindows)
{
//如果有pending中的,把中断从APIC的PIB内存里更新到IRR里排队
if (VMCPU_FF_TEST_AND_CLEAR(pVCpu, VMCPU_FF_UPDATE_APIC))
{
//具体实现见前面APIC一篇
APICUpdatePendingInterrupts(pVCpu);
//如果没有中断需要处理,直接返回
if (!VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC
| VMCPU_FF_INTERRUPT_NMI | VMCPU_FF_INTERRUPT_SMI))
return VINF_SUCCESS;
}
//现在无法处理SMI中断,返回错误
AssertReturn(!VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_SMI), VERR_NEM_IPE_0);
//是否有nmi中断
bool const fPendingNmi = VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INTERRUPT_NMI);
//从调用hypercall获取GuestOS相关寄存器值
uint64_t fNeedExtrn = CPUMCTX_EXTRN_NEM_WIN_INHIBIT_INT | CPUMCTX_EXTRN_RIP | CPUMCTX_EXTRN_RFLAGS | (fPendingNmi ? CPUMCTX_EXTRN_NEM_WIN_INHIBIT_NMI : 0);
if (pVCpu->cpum.GstCtx.fExtrn & fNeedExtrn)
{
VBOXSTRICTRC rcStrict = nemHCWinImportStateIfNeededStrict(pVCpu, NEM_WIN_CPUMCTX_EXTRN_MASK_FOR_IEM_XCPT, "IntFF");
if (rcStrict != VINF_SUCCESS)
return rcStrict;
}
//如果开启了屏蔽中断而且当前rip是发生中断的RIP(当前发生的中断被屏蔽了)
bool const fInhibitInterrupts = VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_INHIBIT_INTERRUPTS)
&& EMGetInhibitInterruptsPC(pVCpu) == pVCpu->cpum.GstCtx.rip;
//有pending的nmi中断,调用IEM里函数模拟执行注入中断
if (fPendingNmi)
{
if ( !fInhibitInterrupts
&& !VMCPU_FF_IS_SET(pVCpu, VMCPU_FF_BLOCK_NMIS))
{
VBOXSTRICTRC rcStrict = nemHCWinImportStateIfNeededStrict(pVCpu, NEM_WIN_CPUMCTX_EXTRN_MASK_FOR_IEM_XCPT, "NMI");
if (rcStrict == VINF_SUCCESS)
{
VMCPU_FF_CLEAR(pVCpu, VMCPU_FF_INTERRUPT_NMI);
rcStrict = IEMInjectTrap(pVCpu, X86_XCPT_NMI, TRPM_HARDWARE_INT, 0, 0, 0);
}
return rcStrict;
}
//中断被屏蔽了
*pfInterruptWindows |= NEM_WIN_INTW_F_NMI;
}
//有APIC/PIC中断
if (VMCPU_FF_IS_ANY_SET(pVCpu, VMCPU_FF_INTERRUPT_APIC | VMCPU_FF_INTERRUPT_PIC))
{
if ( !fInhibitInterrupts
&& pVCpu->cpum.GstCtx.rflags.Bits.u1IF)
{
//获取需要的寄存器信息
VBOXSTRICTRC rcStrict = nemHCWinImportStateIfNeededStrict(pVCpu, NEM_WIN_CPUMCTX_EXTRN_MASK_FOR_IEM_XCPT, "NMI");
if (rcStrict == VINF_SUCCESS)
{
//从APIC里获取一个优先级最高的中断,移动到APIC设备的ISR队列中
uint8_t bInterrupt;
int rc = PDMGetInterrupt(pVCpu, &bInterrupt);
if (RT_SUCCESS(rc))
{
//模拟执行注入中断
rcStrict = IEMInjectTrap(pVCpu, bInterrupt, TRPM_HARDWARE_INT, 0, 0, 0);
}
//中断优先级小于当前VCPU运行优先级,被屏蔽了,记录下信息
else if (rc == VERR_APIC_INTR_MASKED_BY_TPR)
{
*pfInterruptWindows |= (bInterrupt >> 4 /*??*/) << NEM_WIN_INTW_F_PRIO_SHIFT;
}
}
return rcStrict;
}
//中断被屏蔽了,记录下信息
*pfInterruptWindows |= NEM_WIN_INTW_F_REGULAR;
}
}