blob: e780efc8dabe08b26e0bd94c0f9625c7a2da1fef [file] [log] [blame]
/*
* Copyright 2017, Data61
* Commonwealth Scientific and Industrial Research Organisation (CSIRO)
* ABN 41 687 119 230.
*
* This software may be distributed and modified according to the terms of
* the GNU General Public License version 2. Note that NO WARRANTY is provided.
* See "LICENSE_GPLv2.txt" for details.
*
* @TAG(DATA61_GPL)
*/
/* x86 fetch/decode/emulate code
Author: W.A.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "vmm/debug.h"
#include "vmm/platform/guest_vspace.h"
#include "vmm/platform/guest_memory.h"
#include "vmm/guest_state.h"
#include "vmm/processor/platfeature.h"
/* TODO are these defined elsewhere? */
#define IA32_PDE_SIZE(pde) (pde & BIT(7))
#define IA32_PDE_PRESENT(pde) (pde & BIT(0))
#define IA32_PTE_ADDR(pte) (pte & 0xFFFFF000)
#define IA32_PSE_ADDR(pde) (pde & 0xFFC00000)
#define IA32_OPCODE_S(op) (op & BIT(0))
#define IA32_OPCODE_D(op) (op & BIT(1))
#define IA32_OPCODY_BODY(op) (op & 0b11111100)
#define IA32_MODRM_REG(m) ((m & 0b00111000) >> 3)
#define SEG_MULT (0x10)
/* Get a word from a guest physical address */
inline static uint32_t guest_get_phys_word(vmm_t *vmm, uintptr_t addr) {
uint32_t val;
vmm_guest_vspace_touch(&vmm->guest_mem.vspace, addr, sizeof(uint32_t),
vmm_guest_get_phys_data_help, &val);
return val;
}
/* Fetch a guest's instruction */
int vmm_fetch_instruction(vmm_vcpu_t *vcpu, uint32_t eip, uintptr_t cr3,
int len, uint8_t *buf) {
/* Walk page tables to get physical address of instruction */
uintptr_t instr_phys = 0;
/* ensure that PAE is not enabled */
if (vmm_guest_state_get_cr4(&vcpu->guest_state, vcpu->guest_vcpu) & X86_CR4_PAE) {
ZF_LOGE("Do not support walking PAE paging structures");
return -1;
}
// TODO implement page-boundary crossing properly
assert((eip >> 12) == ((eip + len) >> 12));
uint32_t pdi = eip >> 22;
uint32_t pti = (eip >> 12) & 0x3FF;
uint32_t pde = guest_get_phys_word(vcpu->vmm, cr3 + pdi * 4);
assert(IA32_PDE_PRESENT(pde)); /* WTF? */
if (IA32_PDE_SIZE(pde)) {
/* PSE is used, 4M pages */
instr_phys = (uintptr_t)IA32_PSE_ADDR(pde) + (eip & 0x3FFFFF);
} else {
/* 4k pages */
uint32_t pte = guest_get_phys_word(vcpu->vmm,
(uintptr_t)IA32_PTE_ADDR(pde) + pti * 4);
assert(IA32_PDE_PRESENT(pte));
instr_phys = (uintptr_t)IA32_PTE_ADDR(pte) + (eip & 0xFFF);
}
/* Fetch instruction */
vmm_guest_vspace_touch(&vcpu->vmm->guest_mem.vspace, instr_phys, len,
vmm_guest_get_phys_data_help, buf);
return 0;
}
/* Returns 1 if this byte is an x86 instruction prefix */
static int is_prefix(uint8_t byte) {
switch (byte) {
case 0x26:
case 0x2e:
case 0x36:
case 0x3e:
case 0x64:
case 0x65:
case 0x67:
case 0x66:
return 1;
}
return 0;
}
static void debug_print_instruction(uint8_t *instr, int instr_len) {
printf("instruction dump: ");
for (int j = 0; j < instr_len; j++) {
printf("%2x ", instr[j]);
}
printf("\n");
}
/* Partial support to decode an instruction for a memory access
This is very crude. It can break in many ways. */
int vmm_decode_instruction(uint8_t *instr, int instr_len, int *reg, uint32_t *imm, int *op_len) {
/* First loop through and check prefixes */
int oplen = 1; /* Operand length */
int i;
for (i = 0; i < instr_len; i++) {
if (is_prefix(instr[i])) {
if (instr[i] == 0x66) {
/* 16 bit modifier */
oplen = 2;
}
} else {
/* We've hit the opcode */
break;
}
}
assert(i < instr_len); /* We still need an opcode */
uint8_t opcode = instr[i];
//uint8_t opcode_ex = 0;
if (opcode == 0x0f) {
printf("can't emulate instruction with multi-byte opcode!\n");
debug_print_instruction(instr, instr_len);
assert(0); /* We don't handle >1 byte opcodes */
}
if (oplen != 2 && IA32_OPCODE_S(opcode)) {
oplen = 4;
}
uint8_t modrm = instr[++i];
switch (opcode) {
case 0x88:
case 0x89:
case 0x8a:
case 0x8b:
case 0x8c:
// Mov with register
*reg = IA32_MODRM_REG(modrm);
*op_len = oplen;
break;
case 0xc6:
case 0xc7:
// Mov with immediate
*reg = -1;
*op_len = oplen;
uint32_t immediate = 0;
for (int j = 0; j < oplen; j++) {
immediate <<= 8;
immediate |= instr[instr_len - j - 1];
}
*imm = immediate;
break;
default:
printf("can't emulate this instruction!\n");
debug_print_instruction(instr, instr_len);
assert(0);
}
return 0;
}
/*
Useful information: The GDT loaded by the Linux SMP trampoline looks like:
0x00: 00 00 00 00 00 00 00 00
0x08: 00 00 00 00 00 00 00 00
0x10: ff ff 00 00 00 9b cf 00 <- Executable 0x00000000-0xffffffff
0x18: ff ff 00 00 00 93 cf 00 <- RW data 0x00000000-0xffffffff
*/
/* Interpret just enough virtual 8086 instructions to run trampoline code.
Returns the final jump address */
uintptr_t vmm_emulate_realmode(guest_memory_t *gm, uint8_t *instr_buf,
uint16_t *segment, uintptr_t eip, uint32_t len, guest_state_t *gs)
{
/* We only track one segment, and assume that code and data are in the same
segment, which is valid for most trampoline and bootloader code */
uint8_t *instr = instr_buf;
assert(segment);
while (instr - instr_buf < len) {
uintptr_t mem = 0;
uint32_t lit = 0;
int m66 = 0;
uint32_t base = 0;
uint32_t limit = 0;
if (*instr == 0x66) {
m66 = 1;
instr++;
}
if (*instr == 0x0f) {
instr++;
if (*instr == 0x01) {
instr++;
if (*instr == 0x1e) {
// lidtl
instr++;
memcpy(&mem, instr, 2);
mem += *segment * SEG_MULT;
instr += 2;
/* Limit is first 2 bytes, base is next 4 bytes */
vmm_guest_vspace_touch(&gm->vspace, mem,
2, vmm_guest_get_phys_data_help, &limit);
vmm_guest_vspace_touch(&gm->vspace, mem + 2,
4, vmm_guest_get_phys_data_help, &base);
DPRINTF(4, "lidtl %p\n", (void*)mem);
vmm_guest_state_set_idt_base(gs, base);
vmm_guest_state_set_idt_limit(gs, limit);
} else if (*instr == 0x16) {
// lgdtl
instr++;
memcpy(&mem, instr, 2);
mem += *segment * SEG_MULT;
instr += 2;
/* Limit is first 2 bytes, base is next 4 bytes */
vmm_guest_vspace_touch(&gm->vspace, mem,
2, vmm_guest_get_phys_data_help, &limit);
vmm_guest_vspace_touch(&gm->vspace, mem + 2,
4, vmm_guest_get_phys_data_help, &base);
DPRINTF(4, "lgdtl %p; base = %x, limit = %x\n", (void*)mem,
base, limit);
vmm_guest_state_set_gdt_base(gs, base);
vmm_guest_state_set_gdt_limit(gs, limit);
} else {
//ignore
instr++;
}
} else {
//ignore
instr++;
}
} else if (*instr == 0xea) {
/* Absolute jmp */
instr++;
uint32_t base = 0;
uintptr_t jmp_addr = 0;
if (m66) {
// base is 4 bytes
/* Make the wild assumptions that we are now in protected mode
and the relevant GDT entry just covers all memory. Therefore
the base address is our absolute address. This just happens
to work with Linux and probably other modern systems that
don't use the GDT much. */
memcpy(&base, instr, 4);
instr += 4;
jmp_addr = base;
memcpy(segment, instr, 2);
} else {
memcpy(&base, instr, 2);
instr += 2;
memcpy(segment, instr, 2);
jmp_addr = *segment * SEG_MULT + base;
}
instr += 2;
DPRINTF(4, "absolute jmpf $%p, cs now %04x\n", (void*)jmp_addr, *segment);
if (((int64_t)jmp_addr - (int64_t)(len + eip)) >= 0) {
vmm_guest_state_set_cs_selector(gs, *segment);
return jmp_addr;
} else {
instr = jmp_addr - eip + instr_buf;
}
} else {
switch (*instr) {
case 0xa1:
/* mov offset memory to eax */
instr++;
memcpy(&mem, instr, 2);
instr += 2;
mem += *segment * SEG_MULT;
DPRINTF(4, "mov %p, eax\n", (void*)mem);
uint32_t eax;
vmm_guest_vspace_touch(&gm->vspace, mem,
4, vmm_guest_get_phys_data_help, &eax);
vmm_set_user_context(gs, USER_CONTEXT_EAX, eax);
break;
case 0xc7:
instr++;
if (*instr == 0x06) { // modrm
int size;
instr++;
/* mov literal to memory */
memcpy(&mem, instr, 2);
mem += *segment * SEG_MULT;
instr += 2;
if (m66) {
memcpy(&lit, instr, 4);
size = 4;
} else {
memcpy(&lit, instr, 2);
size = 2;
}
instr += size;
DPRINTF(4, "mov $0x%x, %p\n", lit, (void*)mem);
vmm_guest_vspace_touch(&gm->vspace, mem,
size, vmm_guest_set_phys_data_help, &lit);
}
break;
case 0xba:
//?????mov literal to dx
/* ignore */
instr += 2;
break;
case 0x8c:
case 0x8e:
/* mov to/from sreg. ignore */
instr += 2;
break;
default:
/* Assume this is a single byte instruction we can ignore */
instr++;
}
}
DPRINTF(6, "read %zu bytes\n", (size_t)(instr - instr_buf));
}
return 0;
}