APC 篇—— APC 执行
阅读原文时间:2022年02月06日阅读:4

  此系列是本人一个字一个字码出来的,包括示例和实验截图。由于系统内核的复杂性,故可能有错误或者不全面的地方,如有错误,欢迎批评指正,本教程将会长期更新。 如有好的建议,欢迎反馈。码字不易,如果本篇文章有帮助你的,如有闲钱,可以打赏支持我的创作。如想转载,请把我的转载信息附在文章后面,并声明我的个人信息和本人博客地址即可,但必须事先通知我

你如果是从中间插过来看的,请仔细阅读 羽夏看Win系统内核——简述 ,方便学习本教程。

  看此教程之前,问几个问题,基础知识储备好了吗?保护模式篇学会了吗?练习做完了吗?没有的话就不要继续了。


华丽的分割线


  我们逆向该函数的目的是把它是如何调用进入内核,如何初始化APC,如何插入APC。为了节约篇幅,增加可读性,故使用经过重命名好的伪C代码进行讲解,你的命名可能和我的不太一样,如果不知道类型和变量含义,可以参考WRK进行。

  QueueUserAPC这个函数在kernel32这个Dll中,我们来看看它的伪代码:

DWORD __stdcall QueueUserAPC(PAPCFUNC pfnAPC, HANDLE hThread, ULONG_PTR dwData)
{
  NTSTATUS status; // eax
  int ContextInfo; // eax
  DWORD result; // eax
  int pvBuffer; // [esp+4h] [ebp-8h] BYREF
  int v7; // [esp+8h] [ebp-4h]

  pvBuffer = 0;
  v7 = 0;
  status = RtlQueryInformationActivationContext(
             RTL_QUERY_ACTIVATION_CONTEXT_FLAG_USE_ACTIVE_ACTIVATION_CONTEXT,
             0,
             0,
             ActivationContextBasicInformation,
             &pvBuffer,
             8u,
             0);
  if ( status < 0 )
  {
    DbgPrint(
      "SXS: %s failing because RtlQueryInformationActivationContext() returned status %08lx\n",
      "QueueUserAPC",
      status);
    result = 0;
  }
  else
  {
    ContextInfo = pvBuffer;
    if ( (v7 & 1) != 0 )
      ContextInfo = -1;
    result = NtQueueApcThread(hThread, BaseDispatchAPC, pfnAPC, dwData, ContextInfo) >= 0;
  }
  return result;
}

  RtlQueryInformationActivationContext这个名字贼长的函数不知道是干啥的,经过查阅是查询RTL以获取有关当前激活上下文的信息,激活上下文简单就是manifest里面描述信息封装成的结构体,对于该函数也不知道有啥用,就先略过,把重心放到NtQueueApcThread上面。

  BaseDispatchAPC是个函数,通过函数名是派发APC执行的函数,点击去看看其伪代码:

void __fastcall BaseDispatchAPC(struct _RTL_CALLER_ALLOCATED_ACTIVATION_CONTEXT_STACK_FRAME_EXTENDED *a1, void *a2, PVOID NormalContext, PVOID SystemArgument1, PVOID SystemArgument2)
{
  PRTL_CALLER_ALLOCATED_ACTIVATION_CONTEXT_STACK_FRAME_EXTENDED v5; // ecx
  int v6; // [esp+0h] [ebp-3Ch]
  int v7[5]; // [esp+Ch] [ebp-30h] BYREF
  HANDLE handle; // [esp+20h] [ebp-1Ch]
  CPPEH_RECORD ms_exc; // [esp+24h] [ebp-18h]

  v7[0] = 20;
  v7[1] = 1;
  v7[2] = 0;
  v7[3] = 0;
  v7[4] = 0;
  handle = SystemArgument2;
  if ( SystemArgument2 == -1 )
  {
    (NormalContext)(a1, a2, SystemArgument1);
  }
  else
  {
    RtlActivateActivationContextUnsafeFast(a1, a2);
    ms_exc.registration.TryLevel = 0;
    (NormalContext)(SystemArgument1, v7, SystemArgument2, v6);
    ms_exc.registration.TryLevel = -1;
    RtlDeactivateActivationContextUnsafeFast(v5);
    RtlReleaseActivationContext(handle);
  }
}

  经过分析,该函数会把我们想要执行的函数地址是NormalContext参数,传入的参数就是参数1,激活上下文变成参数2。我们把注意力放到该函数:

NTSTATUS __stdcall NtQueueApcThread(HANDLE ThreadHandle, PKNORMAL_ROUTINE ApcRoutine, PVOID NormalContext, PVOID SystemArgument1, PVOID SystemArgument2)
{
  PETHREAD Thread; // ebx
  struct _KAPC *apc; // eax
  struct _KAPC *apc_1; // edi
  HANDLE v9; // [esp+14h] [ebp+8h] FORCED

  Thread = ObReferenceObjectByHandle(
             ThreadHandle,
             0x10u,
             PsThreadType,
             KeGetCurrentThread()->PreviousMode,
             &ThreadHandle,
             0);
  if ( Thread >= 0 )
  {
    Thread = 0;
    if ( (*(ThreadHandle + 584) & 0x10) != 0 )  // PETHREAD->CrossThreadFlags
    {
      Thread = STATUS_INVALID_HANDLE;
    }
    else
    {
      apc = ExAllocatePoolWithQuotaTag(8, 0x30u, 'pasP');
      apc_1 = apc;
      if ( apc )
      {
        KeInitializeApc(apc, v9, OriginalApcEnvironment, IopDeallocateApc, 0, ApcRoutine, 1, NormalContext);
        if ( !KeInsertQueueApc(apc_1, SystemArgument1, SystemArgument2, 0) )
        {
          ExFreePoolWithTag(apc_1, 0);
          Thread = STATUS_UNSUCCESSFUL;
        }
      }
      else
      {
        Thread = STATUS_NO_MEMORY;
      }
    }
    ObfDereferenceObject(v9);
  }
  return Thread;
}

  可以看出,如果线程句柄被正常的转化为线程结构体,就会初始化APC,然后插入APC,全部的流程就这些。但这个思考题并没有完成,我们还得弄懂是如何初始化的,如何插入的。先看初始化:

PKAPC __stdcall KeInitializeApc(PKAPC Apc, PKTHREAD Thread, _KAPC_ENVIRONMENT Environment, int KernelRoutine, int RundownRoutine, int NormalRoutine, char ApcMode, PVOID NormalContext)
{
  PKAPC result; // eax
  char ApcStateIndex; // dl

  result = Apc;
  ApcStateIndex = Environment;
  Apc->Type = 0x12;
  Apc->Size = 0x30;
  if ( Environment == CurrentApcEnvironment )
    ApcStateIndex = Thread->ApcStateIndex;
  Apc->Thread = Thread;
  Apc->KernelRoutine = KernelRoutine;
  Apc->ApcStateIndex = ApcStateIndex;
  Apc->RundownRoutine = RundownRoutine;
  Apc->NormalRoutine = NormalRoutine;
  if ( NormalRoutine )
  {
    Apc->ApcMode = ApcMode;
    Apc->NormalContext = NormalContext;
  }
  else
  {
    Apc->ApcMode = 0;
    Apc->NormalContext = 0;
  }
  Apc->Inserted = 0;
  return result;
}

  初始化代码函数十分简单,我就不赘述了。再来看看如何插入:

char __stdcall KeInsertQueueApc(PKAPC apc, PVOID SystemArgument1, PVOID SystemArgument2, KPRIORITY Increment)
{
  _KTHREAD *thread; // esi
  char res; // bl
  struct _KLOCK_QUEUE_HANDLE LockHandle; // [esp+Ch] [ebp-Ch] BYREF

  thread = apc->Thread;
  KeAcquireInStackQueuedSpinLockRaiseToSynch(&thread->ApcQueueLock, &LockHandle);
  res = 0;
  if ( thread->ApcQueueable )
  {
    apc->SystemArgument1 = SystemArgument1;
    apc->SystemArgument2 = SystemArgument2;
    res = KiInsertQueueApc(apc, Increment);
  }
  KeReleaseInStackQueuedSpinLock(&LockHandle);
  return res;
}

  这个插入函数也是挺简单的,加了一个APC队列自旋锁,然后调用KiInsertQueueApc实现,然后再释放。我们看看这个函数是咋实现的:

char __fastcall KiInsertQueueApc(PKAPC apc, KPRIORITY Increment)
{
  // [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]

  Inserted = apc->Inserted == 0;
  thread = apc->Thread;
  if ( !Inserted )
    return 0;
  if ( apc->ApcStateIndex == 3 )
    apc->ApcStateIndex = thread->ApcStateIndex;
  ApcState = thread->ApcStatePointer[apc->ApcStateIndex];
  ApcMode = apc->ApcMode;
  if ( apc->NormalRoutine )
  {
    if ( ApcMode && apc->KernelRoutine == PsExitSpecialApc )
    {
      thread->ApcState.UserApcPending = 1;
      v8 = &ApcState->ApcListHead[ApcMode];
      v9 = v8->Flink;
      apc->ApcListEntry.Flink = v8->Flink;
      apc->ApcListEntry.Blink = v8;
      v9->Blink = &apc->ApcListEntry;
      v8->Flink = &apc->ApcListEntry;
    }
    else
    {
      v10 = &ApcState->ApcListHead[ApcMode];
      v11 = v10->Blink;
      apc->ApcListEntry.Flink = v10;
      apc->ApcListEntry.Blink = v11;
      v11->Flink = &apc->ApcListEntry;
      v10->Blink = &apc->ApcListEntry;
    }
  }
  else
  {
    v12 = &ApcState->ApcListHead[ApcMode];
    for ( i = v12->Blink; i != v12 && i[2].Flink; i = i->Blink )
      ;
    v14 = i->Flink;
    apc->ApcListEntry.Flink = i->Flink;
    apc->ApcListEntry.Blink = i;
    v14->Blink = &apc->ApcListEntry;
    i->Flink = &apc->ApcListEntry;
  }
  ApcStateIndex = apc->ApcStateIndex;
  apc->Inserted = 1;
  if ( ApcStateIndex == thread->ApcStateIndex )
  {
    if ( ApcMode )
    {
      if ( thread->State == Waiting
        && thread->WaitMode == UserMode
        && (thread->Alertable || thread->ApcState.UserApcPending) )
      {
        thread->ApcState.UserApcPending = 1;
        KiUnwaitThread(thread, STATUS_USER_APC, Increment, 0);
      }
    }
    else
    {
      State = thread->State;
      thread->ApcState.KernelApcPending = 1;
      if ( State == 2 )
      {
        LOBYTE(thread) = 1;
        HalRequestSoftwareInterrupt(thread);
      }
      else if ( State == 5
             && !thread->WaitIrql
             && (!apc->NormalRoutine || !thread->KernelApcDisable && !thread->ApcState.KernelApcInProgress) )
      {
        KiUnwaitThread(thread, STATUS_KERNEL_APC, Increment, 0);
      }
    }
  }
  return 1;
}

  如下部分就是插入用户APC的部分:

if ( ApcMode && apc->KernelRoutine == PsExitSpecialApc )
    {
      thread->ApcState.UserApcPending = 1;
      v8 = &ApcState->ApcListHead[ApcMode];
      v9 = v8->Flink;
      apc->ApcListEntry.Flink = v8->Flink;
      apc->ApcListEntry.Blink = v8;
      v9->Blink = &apc->ApcListEntry;
      v8->Flink = &apc->ApcListEntry;
    }
    else
    {
      v10 = &ApcState->ApcListHead[ApcMode];
      v11 = v10->Blink;
      apc->ApcListEntry.Flink = v10;
      apc->ApcListEntry.Blink = v11;
      v11->Flink = &apc->ApcListEntry;
      v10->Blink = &apc->ApcListEntry;
    }

  本思考题最终结束,当然里面也有很多细节并没有介绍,将在总结与提升介绍。

  这个实验应该也挺好做的,结论是执行两个,代码验证如下:

#include "stdafx.h"
#include <stdlib.h>

#define  _WIN32_WINNT 0x400
#include <windows.h>

VOID WINAPI APCProc(ULONG Param)
{
  printf("APC…… 0x%X \n",Param);
}

DWORD WINAPI ThreadProc(VOID* Param)
{
  for (int i =0 ;i<8;i++)
  {
    Sleep(1000);
    printf("Running\n");
  }
  SleepEx(1,TRUE);
  return 0;
}

int main(int argc, char* argv[])
{
  HANDLE hTread=CreateThread(NULL,NULL,ThreadProc,NULL,NULL,NULL);
  Sleep(3000);
  QueueUserAPC(APCProc,hTread,1);
  QueueUserAPC(APCProc,hTread,2);
  system("pause");
  CloseHandle(hTread);
  return 0;
}

  效果如下:

  APC的执行流程还是挺复杂的,需要大量的逆向。如果发现自己不懂的地方,请参考WRK,里面的参数类型和变量类型都有详细的参考。里面有比较多未知的东西,不会的话,强烈推荐参考WRK。下面我们分如下三部分讲解:合适执行APC、内核APC执行流程、用户APC的执行流程。注意,如果真想学会,请一定要在看完何时执行APC之后的内容就不要再看了,自己完整的把KiDeliverApc这个处理APC函数完整的逆一遍,不会或者完成了后,回来再看。逆向可能需要花费3个小时左右,自己要有心理准备。

何时执行 APC

  我们都知道,APC函数的执行与插入并不是同一个线程,具体点说在A线程中向B线程插入一个APC,插入的动作是在A线程中完成的,但什么时候执行则由B线程决定,所以叫“异步过程调用”。内核APC函数与用户APC函数的执行时间和执行方式也有区别,下面我们来看看线程是何时处理APC的:

SwapContext

  这个函数我们都很熟悉,在学习线程切换的时候重点研究的,咱把APC相关的东西拿出来看看:

                cmp     [esi+_KTHREAD.ApcState.KernelApcPending], 0
                jnz     short loc_46A9BD
                popf
                xor     eax, eax
                retn
; ---------------------------------------------------------------------------

loc_46A9BD:                             ; CODE XREF: SwapContext+D7↑j
                popf
                jnz     short loc_46A9C3
                mov     al, 1
                retn

  之前说过KernelApcPending是表示有没有内核APC处理的标志,如果是1,也就是有的话,就会使返回值置1;反之置0。也就是说,这个函数的返回值是有意义的。我们再往上一级函数看看:

; __fastcall KiSwapContext(x)
@KiSwapContext@4 proc near              ; CODE XREF: KiSwapThread()+41↑p

var_200FE4      = dword ptr -200FE4h
var_10          = dword ptr -10h
var_C           = dword ptr -0Ch
var_8           = dword ptr -8
var_4           = dword ptr -4

                sub     esp, 10h
                mov     [esp+10h+var_4], ebx
                mov     [esp+10h+var_8], esi
                mov     [esp+10h+var_C], edi
                mov     [esp+10h+var_10], ebp
                mov     ebx, ds:0FFDFF01Ch ; ebx = &_KPCR
                mov     esi, ecx        ; esi = ecx = NextReadyThread
                mov     edi, [ebx+_KPCR.PrcbData.CurrentThread] ; oldThread
                mov     [ebx+_KPCR.PrcbData.CurrentThread], esi
                mov     cl, [edi+_KTHREAD.WaitIrql]
                call    SwapContext
                mov     ebp, [esp+10h+var_10]
                mov     edi, [esp+10h+var_C]
                mov     esi, [esp+10h+var_8]
                mov     ebx, [esp+10h+var_4]
                add     esp, 10h
                retn
@KiSwapContext@4 endp

  这一级函数并没有用到返回值,也没有操作eax,说明就没有用到吗?我们再往上一级看看:

loc_429CCF:                             ; CODE XREF: KiSwapThread()+1A↑j
                mov     ecx, eax        ; 从这里判断出这个参数为一个线程结构体 ecx 传参
                call    @KiSwapContext@4 ; KiSwapContext(x)
                test    al, al
                mov     cl, [edi+58h]
                mov     edi, [edi+54h]
                mov     esi, ds:__imp_@KfLowerIrql@4 ; KfLowerIrql(x)
                jz      short loc_429CF6
                mov     cl, 1           ; NewIrql
                call    esi ; KfLowerIrql(x) ; KfLowerIrql(x)
                xor     eax, eax
                push    eax             ; trapframe
                push    eax             ; unknown
                push    eax             ; CanUserMode
                call    _KiDeliverApc@12 ; KiDeliverApc(x,x,x)
                xor     cl, cl          ; NewIrql

loc_429CF6:                             ; CODE XREF: KiSwapThread()+54↑j
                call    esi ; KfLowerIrql(x) ; KfLowerIrql(x)
                mov     eax, edi
                pop     edi
                pop     esi
                retn

  看到test al, al这条汇编了吗?如果为1的话,也就是有内核APC执行,就会继续往下走,调用KiDeliverApc这个函数,也就是我们前面提到的处理APC的函数,传递的三个参数都是0。

KiServiceExit

  如果你比较仔细的话,这个函数你也是眼熟的,就是我们在学习系统调用时看到的函数。不过当时由于知识的限制没有要求。这个函数是系统调用、中断异常必经之处,我们来看看与APC相关的汇编:

                cli
                test    dword ptr [ebp+70h], 20000h
                jnz     short loc_466659
                test    byte ptr [ebp+6Ch], 1
                jz      short loc_4666B0

loc_466659:                             ; CODE XREF: _KiServiceExit+8↑j
                                        ; _KiServiceExit+63↓j
                mov     ebx, ds:0FFDFF124h
                mov     byte ptr [ebx+2Eh], 0
                cmp     byte ptr [ebx+4Ah], 0
                jz      short loc_4666B0
                mov     ebx, ebp
                mov     [ebx+44h], eax
                mov     dword ptr [ebx+50h], 3Bh ; ';'
                mov     dword ptr [ebx+38h], 23h ; '#'
                mov     dword ptr [ebx+34h], 23h ; '#'
                mov     dword ptr [ebx+30h], 0
                mov     ecx, 1          ; NewIrql
                call    ds:__imp_@KfRaiseIrql@4 ; KfRaiseIrql(x)
                push    eax
                sti
                push    ebx             ; trapframe
                push    0               ; unknown
                push    1               ; CanUserAPC
                call    _KiDeliverApc@12 ; KiDeliverApc(x,x,x)
                pop     ecx             ; NewIrql
                call    ds:__imp_@KfLowerIrql@4 ; KfLowerIrql(x)
                mov     eax, [ebx+44h]
                cli
                jmp     short loc_466659

  可以看出传递参数的不同,第一个参数是1,第二个参数是0,第三个就是所谓的TrapFrame

内核 APC 执行流程

  内核APC执行算是简单的。不过为了讲解方便,我们需要把KiDeliverApc的伪代码放上:

_KTRAP_FRAME *__stdcall KiDeliverApc(BOOLEAN CanUserAPC, int unknown, _KTRAP_FRAME *trapframe)
{
  // [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]

  if ( trapframe )
  {
    _eip = trapframe->Eip;
    if ( _eip >= ExpInterlockedPopEntrySListResume && _eip <= ExpInterlockedPopEntrySListEnd )
      trapframe->Eip = ExpInterlockedPopEntrySListResume;
  }
  currentThread = KeGetCurrentThread();
  tmpTrapFrame = currentThread->Tcb.TrapFrame;
  v5 = currentThread->Tcb.ApcState.Process;
  currentThread->Tcb.TrapFrame = trapframe;
  APCDeliverProcess = v5;
  KeAcquireInStackQueuedSpinLock(&currentThread->Tcb.ApcQueueLock, &LockHandle);
  currentThread->Tcb.ApcState.KernelApcPending = 0;
  ApcListEntry = (currentThread + offsetof(_KTHREAD, ApcState));
  while ( !IsListEmpty(ApcListEntry) )
  {
    plist = ApcListEntry->Flink;
    kpac = CONTAINING_RECORD(ApcListEntry->Flink, _KAPC, ApcListEntry);
    KernelRoutine = kpac->KernelRoutine;
    NormalRoutine = kpac->NormalRoutine;
    WaitStatus = kpac->NormalContext;
    SystemArgument1 = kpac->SystemArgument1;
    SystemArgument2 = kpac->SystemArgument2;
    if ( NormalRoutine )
    {
      if ( currentThread->Tcb.ApcState.KernelApcInProgress || currentThread->Tcb.KernelApcDisable )
        goto NoUserAPC;
      RemoveEntryList(plist);
      kpac->Inserted = 0;
      KeReleaseInStackQueuedSpinLock(&LockHandle);
      KernelRoutine(kpac, &NormalRoutine, &WaitStatus, &SystemArgument1, &SystemArgument2);
      if ( NormalRoutine )
      {
        currentThread->Tcb.ApcState.KernelApcInProgress = 1;
        KfLowerIrql(0);
        (NormalRoutine)(WaitStatus, SystemArgument1, SystemArgument2);
        LockHandle.OldIrql = KfRaiseIrql(1u);
      }
      KeAcquireInStackQueuedSpinLock(&currentThread->Tcb.ApcQueueLock, &LockHandle);
      currentThread->Tcb.ApcState.KernelApcInProgress = 0;
    }
    else
    {
      RemoveEntryList(plist);
      kpac->Inserted = 0;
      KeReleaseInStackQueuedSpinLock(&LockHandle);
      KernelRoutine(kpac, &NormalRoutine, &WaitStatus, &SystemArgument1, &SystemArgument2);
      KeAcquireInStackQueuedSpinLock(&currentThread->Tcb.ApcQueueLock, &LockHandle);
    }
  }
  P = currentThread->Tcb.ApcState.ApcListHead[UserMode].Flink;
  if ( P == &currentThread->Tcb.ApcState.ApcListHead[UserMode]
    || CanUserAPC != UserMode
    || !currentThread->Tcb.ApcState.UserApcPending )
  {
NoUserAPC:
    KeReleaseInStackQueuedSpinLock(&LockHandle);
    goto LABEL_21;
  }
  currentThread->Tcb.ApcState.UserApcPending = 0;
  kpacSTATE = &P[-2u].Blink;                    // ====如下代码错误请参考汇编===
  KernelRoutine_1 = P[1].Flink;
  NormalRoutine = P[2].Flink;
  WaitStatus = P[2].Blink;
  SystemArgument1 = P[3].Flink;
  SystemArgument2 = P[3].Blink;
  v12 = P->Flink;
  v13 = P->Blink;
  v13->Flink = v12;
  v12->Blink = v13;
  kpacSTATE[1].UserApcPending = 0;              // =======错误代码结束=======
  KeReleaseInStackQueuedSpinLock(&LockHandle);
  (KernelRoutine_1)(kpacSTATE, &NormalRoutine, &WaitStatus, &SystemArgument1, &SystemArgument2);
  if ( NormalRoutine )
    KiInitializeUserApc(unknown, trapframe, NormalRoutine, WaitStatus, SystemArgument1, SystemArgument2);
  else
    KeTestAlertThread(1);
LABEL_21:
  if ( currentThread->Tcb.ApcState.Process != APCDeliverProcess )
    KeBugCheckEx(
      5u,
      APCDeliverProcess,
      currentThread->Tcb.ApcState.Process,
      currentThread->Tcb.ApcStateIndex,
      KeGetPcr()->PrcbData.DpcRoutineActive);
  result = tmpTrapFrame;
  currentThread->Tcb.TrapFrame = tmpTrapFrame;
  return result;
}

  当然,上面的伪代码有错误,我已经用注释标注错误的区域,下面我们再把汇编放上:

; _KTRAP_FRAME *__stdcall KiDeliverApc(BOOLEAN CanUserAPC, int unknown, _KTRAP_FRAME *trapframe)
                public _KiDeliverApc@12
_KiDeliverApc@12 proc near              ; CODE XREF: KiSwapThread()+5F↓p
                                        ; _KiServiceExit+53↓p ...

LockHandle      = _KLOCK_QUEUE_HANDLE ptr -28h
tmpTrapFrame    = dword ptr -1Ch
APCDeliverProcess= dword ptr -18h
KernelRoutine   = dword ptr -14h
NormalContext   = dword ptr -10h
SystemArgument1 = dword ptr -0Ch
SystemArgument2 = dword ptr -8
NormalRoutine   = dword ptr -4
CanUserAPC      = byte ptr  8
a2              = dword ptr  0Ch
trapframe       = dword ptr  10h

                mov     edi, edi
                push    ebp
                mov     ebp, esp
                sub     esp, 28h
                mov     ecx, [ebp+trapframe]
                test    ecx, ecx
                jz      short loc_426A54
                mov     edx, [ecx+_KTRAP_FRAME._Eip]
                mov     eax, offset _ExpInterlockedPopEntrySListResume@0 ; ExpInterlockedPopEntrySListResume()
                cmp     edx, eax
                jb      short loc_426A54
                cmp     edx, offset _ExpInterlockedPopEntrySListEnd@0 ; ExpInterlockedPopEntrySListEnd()
                ja      short loc_426A54
                mov     [ecx+_KTRAP_FRAME._Eip], eax

loc_426A54:                             ; CODE XREF: KiDeliverApc(x,x,x)+D↑j
                                        ; KiDeliverApc(x,x,x)+19↑j ...
                push    ebx
                push    esi
                push    edi
                mov     eax, large fs:_KPCR.PrcbData.CurrentThread
                mov     esi, eax
                mov     eax, [esi+_KTHREAD.TrapFrame]
                mov     [ebp+tmpTrapFrame], eax
                mov     eax, [esi+_KTHREAD.ApcState.Process]
                mov     [esi+_KTHREAD.TrapFrame], ecx
                lea     ecx, [esi+_ETHREAD.Tcb.ApcQueueLock] ; SpinLock
                lea     edx, [ebp+LockHandle] ; LockHandle
                mov     [ebp+APCDeliverProcess], eax
                call    ds:__imp_@KeAcquireInStackQueuedSpinLock@8 ; KeAcquireInStackQueuedSpinLock(x,x)
                mov     [esi+_KTHREAD.ApcState.KernelApcPending], 0
                lea     ebx, [esi+_KTHREAD.ApcState]
                jmp     loc_426B70
; ---------------------------------------------------------------------------

ListEmptyLoop:                          ; CODE XREF: KiDeliverApc(x,x,x)+144↓j
                mov     eax, [ebx]
                lea     edi, [eax-0Ch]
                mov     ecx, [edi+_KAPC.KernelRoutine]
                mov     [ebp+KernelRoutine], ecx
                mov     ecx, [edi+_KAPC.NormalRoutine]
                test    ecx, ecx
                mov     [ebp+NormalRoutine], ecx
                mov     edx, [edi+_KAPC.NormalContext]
                mov     [ebp+NormalContext], edx
                mov     edx, [edi+_KAPC.SystemArgument1]
                mov     [ebp+SystemArgument1], edx
                mov     edx, [edi+_KAPC.SystemArgument2]
                mov     [ebp+SystemArgument2], edx
                jnz     short NormalRoutineNotNull
                mov     ecx, [eax]      ; 开始摘掉 APC
                mov     eax, [eax+4]
                mov     [eax], ecx
                mov     [ecx+4], eax
                lea     ecx, [ebp+LockHandle] ; LockHandle
                mov     [edi+_KAPC.Inserted], 0 ; WaitListEntry
                call    ds:__imp_@KeReleaseInStackQueuedSpinLock@4 ; KeReleaseInStackQueuedSpinLock(x)
                lea     eax, [ebp+SystemArgument2]
                push    eax
                lea     eax, [ebp+SystemArgument1]
                push    eax
                lea     eax, [ebp+NormalContext]
                push    eax
                lea     eax, [ebp+NormalRoutine]
                push    eax
                push    edi
                call    [ebp+KernelRoutine]
                lea     edx, [ebp+LockHandle] ; LockHandle
                lea     ecx, [esi+_KTHREAD.ApcQueueLock] ; SpinLock
                call    ds:__imp_@KeAcquireInStackQueuedSpinLock@8 ; KeAcquireInStackQueuedSpinLock(x,x)
                jmp     short loc_426B70
; ---------------------------------------------------------------------------

NormalRoutineNotNull:                   ; CODE XREF: KiDeliverApc(x,x,x)+86↑j
                cmp     [esi+_KTHREAD.ApcState.KernelApcInProgress], 0
                jnz     NoUserAPC
                cmp     [esi+_KTHREAD.KernelApcDisable], 0
                jnz     NoUserAPC
                mov     ecx, [eax]
                mov     eax, [eax+_LIST_ENTRY.Blink]
                mov     [eax], ecx
                mov     [ecx+_LIST_ENTRY.Blink], eax
                lea     ecx, [ebp+LockHandle] ; LockHandle
                mov     [edi+_KAPC.Inserted], 0
                call    ds:__imp_@KeReleaseInStackQueuedSpinLock@4 ; KeReleaseInStackQueuedSpinLock(x)
                lea     eax, [ebp+SystemArgument2]
                push    eax
                lea     eax, [ebp+SystemArgument1]
                push    eax
                lea     eax, [ebp+NormalContext]
                push    eax
                lea     eax, [ebp+NormalRoutine]
                push    eax
                push    edi
                call    [ebp+KernelRoutine]
                cmp     [ebp+NormalRoutine], 0
                jz      short NormalRoutineNULL
                xor     cl, cl          ; NewIrql
                mov     [esi+_KTHREAD.ApcState.KernelApcInProgress], 1
                call    ds:__imp_@KfLowerIrql@4 ; KfLowerIrql(x)
                push    [ebp+SystemArgument2]
                push    [ebp+SystemArgument1]
                push    [ebp+NormalContext]
                call    [ebp+NormalRoutine]
                mov     cl, 1           ; NewIrql
                call    ds:__imp_@KfRaiseIrql@4 ; KfRaiseIrql(x)
                mov     [ebp+LockHandle.OldIrql], al

NormalRoutineNULL:                      ; CODE XREF: KiDeliverApc(x,x,x)+10A↑j
                lea     edx, [ebp+LockHandle] ; LockHandle
                lea     ecx, [esi+_ETHREAD.Tcb.ApcQueueLock] ; SpinLock
                call    ds:__imp_@KeAcquireInStackQueuedSpinLock@8 ; KeAcquireInStackQueuedSpinLock(x,x)
                mov     [esi+_KTHREAD.ApcState.KernelApcInProgress], 0

loc_426B70:                             ; CODE XREF: KiDeliverApc(x,x,x)+5C↑j
                                        ; KiDeliverApc(x,x,x)+C2↑j
                cmp     [ebx+_KAPC_STATE.ApcListHead.Flink], ebx
                jnz     ListEmptyLoop
                lea     ecx, [esi+(_ETHREAD.Tcb.ApcState.ApcListHead.Flink+8)] ; ApcListHead[UserMode]
                mov     eax, [ecx+_LIST_ENTRY.Flink]
                cmp     eax, ecx
                jz      NoUserAPC
                cmp     [ebp+CanUserAPC], 1
                jnz     short NoUserAPC
                cmp     [esi+_KTHREAD.ApcState.UserApcPending], 0
                jz      short NoUserAPC
                mov     [esi+_KTHREAD.ApcState.UserApcPending], 0
                lea     edi, [eax-0Ch]
                mov     ecx, [edi+_KAPC.NormalRoutine]
                mov     ebx, [edi+_KAPC.KernelRoutine]
                mov     [ebp+NormalRoutine], ecx
                mov     ecx, [edi+_KAPC.NormalContext]
                mov     [ebp+NormalContext], ecx
                mov     ecx, [edi+_KAPC.SystemArgument1]
                mov     [ebp+SystemArgument1], ecx
                mov     ecx, [edi+_KAPC.SystemArgument2]
                mov     [ebp+SystemArgument2], ecx
                mov     ecx, [eax+_LIST_ENTRY.Flink]
                mov     eax, [eax+_LIST_ENTRY.Blink]
                mov     [eax+_LIST_ENTRY.Flink], ecx
                mov     [ecx+_LIST_ENTRY.Blink], eax
                lea     ecx, [ebp+LockHandle] ; LockHandle
                mov     [edi+_KAPC.Inserted], 0
                call    ds:__imp_@KeReleaseInStackQueuedSpinLock@4 ; KeReleaseInStackQueuedSpinLock(x)
                lea     eax, [ebp+SystemArgument2]
                push    eax
                lea     eax, [ebp+SystemArgument1]
                push    eax
                lea     eax, [ebp+NormalContext]
                push    eax
                lea     eax, [ebp+NormalRoutine]
                push    eax
                push    edi
                call    ebx
                cmp     [ebp+NormalRoutine], 0
                jnz     short loc_426BEC
                push    1
                call    _KeTestAlertThread@4 ; KeTestAlertThread(x)
                jmp     short loc_426C0E
; ---------------------------------------------------------------------------

loc_426BEC:                             ; CODE XREF: KiDeliverApc(x,x,x)+1B3↑j
                push    [ebp+SystemArgument2] ; SystemArgument2
                push    [ebp+SystemArgument1] ; SystemArgument1
                push    [ebp+NormalContext] ; NormalContext
                push    [ebp+NormalRoutine] ; NormalRoutine
                push    [ebp+trapframe] ; TrapFrame
                push    [ebp+a2]        ; ExceptionFrame
                call    _KiInitializeUserApc@24 ; KiInitializeUserApc(x,x,x,x,x,x)
                jmp     short loc_426C0E
; ---------------------------------------------------------------------------

NoUserAPC:                              ; CODE XREF: KiDeliverApc(x,x,x)+C8↑j
                                        ; KiDeliverApc(x,x,x)+D5↑j ...
                lea     ecx, [ebp+LockHandle] ; LockHandle
                call    ds:__imp_@KeReleaseInStackQueuedSpinLock@4 ; KeReleaseInStackQueuedSpinLock(x)

loc_426C0E:                             ; CODE XREF: KiDeliverApc(x,x,x)+1BC↑j
                                        ; KiDeliverApc(x,x,x)+1D5↑j
                mov     ecx, [ebp+APCDeliverProcess]
                cmp     [esi+_KTHREAD.ApcState.Process], ecx
                jz      short loc_426C30
                mov     eax, large fs:_KPCR.PrcbData.DpcRoutineActive
                push    eax             ; BugCheckParameter4
                movzx   eax, [esi+_KTHREAD.ApcStateIndex]
                push    eax             ; BugCheckParameter3
                push    [esi+_KTHREAD.ApcState.Process] ; BugCheckParameter2
                push    ecx             ; BugCheckParameter1
                push    5               ; BugCheckCode
                call    _KeBugCheckEx@20 ; KeBugCheckEx(x,x,x,x,x)
; ---------------------------------------------------------------------------

loc_426C30:                             ; CODE XREF: KiDeliverApc(x,x,x)+1E6↑j
                mov     eax, [ebp+tmpTrapFrame]
                pop     edi
                mov     [esi+_KTHREAD.TrapFrame], eax
                pop     esi
                pop     ebx
                leave
                retn    0Ch
_KiDeliverApc@12 endp

  通过代码分析,上面的两处传的参数都会处理内核APC,线程切换处理内核APC,另一个处理用户和内核APC。由于此部分讲解内核APC的处理,我们只看相关部分。

  由于此部分不难,我们只简单介绍一下流程,其他细节自行查看代码:

  1. 判断第一个链表是否为空(链表地址和链表的内容十分相等,相等则为空,微软就是这么设计的)

  2. 判断KTHREAD.ApcState.KernelApcInProgress是否为1

  3. 判断是否禁用内核APCKTHREAD.KernelApcDisable是否为1)

  4. 将当前KAPC结构体从链表中摘除

  5. 执行KAPC.KernelRoutine指定的函数,释放KAPC结构体占用的空间

  6. KTHREAD.ApcState.KernelApcInProgress设置为1,标识正在执行内核APC

  7. 执行真正的内核APC函数(KAPC.NormalRoutine

  8. 执行完毕,将KernelApcInProgress改为0

  9. 循环上述过程直至装内核APC的链表全部完成

      我们可以做出如下总结:

  10. 内核APC在线程切换的时候就会执行,这也就意味着,只要插入内核APC很快就会执行。

  11. 在执行用户APC之前会先执行内核APC

  12. 内核APC在内核空间执行,不需要换栈,一个循环全部执行完毕。

      有些细节我还没有强调,不过自己应该也能够发现,这些东西我都会在下一篇总结与提升进行讲解。下面我们来分析用户APC的执行流程。

用户 APC 执行流程

  上述伪代码错误的区域都是赋值操作,找到KAPC结构,并摘除准备执行的操作,请参考汇编,挺简单的我就不赘述了。我们把重点放到KiInitializeUserApc这个函数上,先看其反汇编:

int __stdcall KiInitializeUserApc(_KTRAP_FRAME *ExceptionFrame, _KTRAP_FRAME *TrapFrame, PKNORMAL_ROUTINE NormalRoutine, PVOID NormalContext, PVOID SystemArgument1, PVOID SystemArgument2)
{
  PKNORMAL_ROUTINE *_esp; // esi
  unsigned int newEflag; // eax
  int result; // eax
  _CONTEXT ContextRecord; // [esp+70h] [ebp-2E8h] BYREF
  void *v10; // [esp+33Ch] [ebp-1Ch]
  CPPEH_RECORD ms_exc; // [esp+340h] [ebp-18h]

  if ( (TrapFrame->EFlags & 0x20000) == offsetof(_KTRAP_FRAME, DbgEbp) )
  {
    ContextRecord.ContextFlags = 0x10017;
    KeContextFromKframes(TrapFrame, ExceptionFrame, &ContextRecord);
    ms_exc.registration.TryLevel = 0;
    _esp = ((ContextRecord.Esp & 0xFFFFFFFC) - 0x2DC);
    ProbeForWrite(_esp, 0x2DCu, 4u);
    qmemcpy(_esp + 4, &ContextRecord, 0x2CCu);
    TrapFrame->SegCs = 0x1B;
    TrapFrame->HardwareSegSs = 0x23;
    TrapFrame->SegDs = 0x23;
    TrapFrame->SegEs = 0x23;
    TrapFrame->SegFs = 0x3B;
    TrapFrame->SegGs = 0;
    if ( (ContextRecord.EFlags & 0x20000) != 0 && KeI386VdmIoplAllowed )
      newEflag = KeI386EFlagsOrMaskV86 | ContextRecord.EFlags & KeI386EFlagsAndMaskV86;
    else
      newEflag = ContextRecord.EFlags & 0x3E0DD7 | 0x200;
    TrapFrame->EFlags = newEflag;
    if ( KeGetCurrentThread()->Iopl )
      BYTE1(TrapFrame->EFlags) |= 0x30u;
    TrapFrame->HardwareEsp = _esp;
    TrapFrame->Eip = KeUserApcDispatcher;
    TrapFrame->ErrCode = 0;
    *_esp = NormalRoutine;
    _esp[1] = NormalContext;
    _esp[2] = SystemArgument1;
    _esp[3] = SystemArgument2;
    ms_exc.registration.TryLevel = -1;
  }
  xHalReferenceHandler(v10);
  return result;
}

  处理用户APC要比内核APC复杂的多,因为,用户APC函数要在用户空间执行的,这里涉及到大量换栈的操作:当线程从用户层进入内核层时,要保留原来的运行环境,比如各种寄存器,栈的位置等等(_Trap_Frame),然后切换成内核的堆栈,如果正常返回,恢复堆栈环境即可。但如果有用户APC要执行的话,就意味着线程要提前返回到用户空间去执行,而且返回的位置不是线程进入内核时的位置,而是返回到其他的位置,每处理一个用户APC都会涉及到:内核到用户空间再回到内核空间。

  线程进0环时,原来的运行环境(寄存器栈顶等)保存到_Trap_Frame结构体中,如果要提前返回3环去处理用户APC,就必须要修改_Trap_Frame结构体。比如:进0环时的位置存储在EIP中,现在要提前返回,而且返回的并不是原来的位置,那就意味着必须要修改EIP为新的返回位置。还有堆栈ESP,也要修改为处理APC需要的堆栈。那原来的值怎么办呢?处理完APC后该如何返回原来的位置呢?

  KiInitializeUserApc要做的第一件事就是备份,将原来_Trap_Frame的值备份到一个新的结构体中(CONTEXT),这个功能由其子函数KeContextFromKframes来完成。我们可以点进去看看:

void __stdcall KeContextFromKframes(_KTRAP_FRAME *TrapFrame, _KTRAP_FRAME *ExceptionFrame, _CONTEXT *ContextRecord)
{
  // [COLLAPSED LOCAL DECLARATIONS. PRESS KEYPAD CTRL-"+" TO EXPAND]

  v3 = TrapFrame;
  NewIrql = KeGetCurrentIrql();
  if ( !NewIrql )
    NewIrql = KfRaiseIrql(1u);
  v4 = ContextRecord->ContextFlags;
  v5 = 0xFFFF;
  if ( (ContextRecord->ContextFlags & CONTEXT_CONTROL) == CONTEXT_CONTROL )
  {
    ContextRecord->Ebp = TrapFrame->Ebp;
    ContextRecord->Eip = TrapFrame->Eip;
    v6 = TrapFrame->SegCs;
    if ( (v6 & 0xFFF8) == 0 && (TrapFrame->EFlags & 0x20000) == 0 )
      v6 = TrapFrame->TempSegCs;
    ContextRecord->SegCs = v6;
    ContextRecord->EFlags = TrapFrame->EFlags;
    ContextRecord->SegSs = KiSegSsFromTrapFrame(TrapFrame);
    ContextRecord->Esp = KiEspFromTrapFrame(TrapFrame);
  }
  if ( (v4 & CONTEXT_SEGMENTS) == CONTEXT_SEGMENTS )
  {
    if ( (TrapFrame->EFlags & 0x20000) != 0 )
    {
      ContextRecord->SegGs = v5 & TrapFrame->V86Gs;
      ContextRecord->SegFs = v5 & TrapFrame->V86Fs;
      ContextRecord->SegEs = v5 & TrapFrame->V86Es;
      v7 = TrapFrame->V86Ds;
    }
    else
    {
      if ( TrapFrame->SegCs == 8 )
      {
        TrapFrame->SegGs = 0;
        TrapFrame->SegFs = 0x30;
        TrapFrame->SegEs = 0x23;
        TrapFrame->SegDs = 0x23;
      }
      ContextRecord->SegGs = v5 & TrapFrame->SegGs;
      ContextRecord->SegFs = v5 & TrapFrame->SegFs;
      ContextRecord->SegEs = v5 & TrapFrame->SegEs;
      v7 = TrapFrame->SegDs;
    }
    ContextRecord->SegDs = v5 & v7;
  }
  v8 = ContextRecord->ContextFlags;
  if ( (ContextRecord->ContextFlags & 0x10002) == 65538 )
  {
    ContextRecord->Edi = TrapFrame->Edi;
    ContextRecord->Esi = TrapFrame->Esi;
    ContextRecord->Ebx = TrapFrame->Ebx;
    ContextRecord->Ecx = TrapFrame->Ecx;
    ContextRecord->Edx = TrapFrame->Edx;
    ContextRecord->Eax = TrapFrame->Eax;
  }
  if ( (v8 & CONTEXT_EXTENDED_REGISTERS) == CONTEXT_EXTENDED_REGISTERS
    && (TrapFrame->SegCs & 1) != 0
    && KeI386NpxPresent )
  {
    KiFlushNPXState(0);
    qmemcpy(ContextRecord->ExtendedRegisters, &TrapFrame[1], sizeof(ContextRecord->ExtendedRegisters));
    v3 = TrapFrame;
  }
  if ( (ContextRecord->ContextFlags & 0x10008) == 65544 && (v3->SegCs & 1) != 0 )
  {
    v9 = &v3[1].DbgEbp;
    if ( KeI386NpxPresent )
    {
      if ( KeI386FxsrPresent == 1 )
      {
        v9 = v16;
        KiFlushNPXState(v16);
      }
      else
      {
        KiFlushNPXState(0);
      }
      ContextRecord->FloatSave.ControlWord = *v9;
      ContextRecord->FloatSave.StatusWord = v9[1];
      ContextRecord->FloatSave.TagWord = v9[2];
      ContextRecord->FloatSave.ErrorOffset = v9[3];
      ContextRecord->FloatSave.ErrorSelector = v9[4];
      ContextRecord->FloatSave.DataOffset = v9[5];
      ContextRecord->FloatSave.DataSelector = v9[6];
      ContextRecord->FloatSave.Cr0NpxState = v3[4].Eip;
      for ( i = 0; i < 0x50; ++i )
        ContextRecord->FloatSave.RegisterArea[i] = *(v9 + i + 28);
    }
    else if ( KiEm87StateToNpxFrame(&ContextRecord->FloatSave) )
    {
      ContextRecord->FloatSave.Cr0NpxState = v3[4].Eip;
    }
    else
    {
      ContextRecord->ContextFlags &= 0xFFFFFFF7;
    }
  }
  if ( (ContextRecord->ContextFlags & CONTEXT_DEBUG_REGISTERS) == CONTEXT_DEBUG_REGISTERS )
  {
    v11 = (v3->SegCs & 1) == 0;
    v12 = KeGetCurrentThread()->DebugActive;
    v15[0] = v12;
    if ( (!v11 || (v3->EFlags & 0x20000) != 0) && v12 )
    {
      ContextRecord->Dr0 = v3->Dr0;
      ContextRecord->Dr1 = v3->Dr1;
      ContextRecord->Dr2 = v3->Dr2;
      ContextRecord->Dr3 = v3->Dr3;
      ContextRecord->Dr6 = v3->Dr6;
      v13 = KiUpdateDr7(v3->Dr7, v15);
    }
    else
    {
      v13 = 0;
      ContextRecord->Dr0 = 0;
      ContextRecord->Dr1 = 0;
      ContextRecord->Dr2 = 0;
      ContextRecord->Dr3 = 0;
      ContextRecord->Dr6 = 0;
    }
    ContextRecord->Dr7 = v13;
  }
  if ( !NewIrql )
    KfLowerIrql(0);
  xHalReferenceHandler(v16[30]);
}

  备份完毕后,我就可以随心所欲改了。_esp = ((ContextRecord.Esp & 0xFFFFFFFC) - 0x2DC)这个就是所谓的提栈操作,为构造的新Context提供空间,ContextRecord.Esp & 0xFFFFFFFC就是让内存实现4字节对齐。将内部的段寄存器相关改为3环的环境,我们重点看下面的代码:

TrapFrame->HardwareEsp = _esp;
TrapFrame->Eip = KeUserApcDispatcher;
TrapFrame->ErrCode = 0;
*_esp = NormalRoutine;
_esp[1] = NormalContext;
_esp[2] = SystemArgument1;
_esp[3] = SystemArgument2;

  可以看出,这几个操作就是修改EIP并压栈的操作,KeUserApcDispatcher是一个全局变量,这个是3环的函数,在系统启动完毕后就会赋值,我们在WinDbg看看是何方神圣:

kd> !process 0 0
**** NT ACTIVE PROCESS DUMP ****
……

Failed to get VadRoot
PROCESS 89b51b28  SessionId: 0  Cid: 05f0    Peb: 7ffde000  ParentCid: 05d4
    DirBase: 12fc01c0  ObjectTable: e17efb00  HandleCount: 353.
    Image: explorer.exe

……

kd> .process 89b51b28
ReadVirtual: 89b51b40 not properly sign extended
Implicit process is now 89b51b28
WARNING: .cache forcedecodeuser is not enabled
kd> u 7c92e430
ntdll!KiUserApcDispatcher:
7c92e430 8d7c2410        lea     edi,[esp+10h]
7c92e434 58              pop     eax
7c92e435 ffd0            call    eax
7c92e437 6a01            push    1
7c92e439 57              push    edi
7c92e43a e801ecffff      call    ntdll!NtContinue (7c92d040)
7c92e43f 90              nop
ntdll!KiUserCallbackDispatcher:
7c92e440 83c404          add     esp,4

  可以看出这个函数就是KiUserApcDispatcher,在ntdll.dll当中,我们用IDA打开看看:

; __stdcall KiUserApcDispatcher(x, x, x, x, x)
                public _KiUserApcDispatcher@20
_KiUserApcDispatcher@20 proc near       ; DATA XREF: .text:off_7C923428↑o

arg_C           = byte ptr  10h

                lea     edi, [esp+arg_C]
                pop     eax
                call    eax
                push    1
                push    edi
                call    _ZwContinue@8   ; ZwContinue(x,x)
                nop
_KiUserApcDispatcher@20 endp ; sp-analysis failed

  esp + 10h就是构造出来的_Trap_Frame结构体的地址。我们画个堆栈图就能明白了:

  当用户在3环调用QueueUserAPC函数来插入APC时,不需要提供NormalRoutine,这个参数是在QueueUserAPC内部指定的,是BaseDispatchAPC这个函数,可以自行翻阅我分析的过程。

  我们下面再看看ZwContinue函数,它的作用如下:

  1. 返回内核,如果还有用户APC,重复上面的执行过程。

  2. 如果没有需要执行的用户APC,会将CONTEXT赋值给Trap_Frame结构体。就像从来没有修改过一样。ZwContinue后面的代码不会执行,线程从哪里进0环仍然会从哪里回去。

      我们继续做出如下总结:

  3. 内核APC在线程切换时执行,不需要换栈,比较简单,一个循环执行完毕。

  4. 用户APC在系统调用、中断或异常返回3环前会进行判断,如果有要执行的用户APC,再执行。

  5. 用户APC执行前会先执行内核APC。

      到此,与APC执行相关的知识,就到此结束了。

      APC 篇——总结与提升