Hypercall mechanism of KVM

Posted by pmaonline on Fri, 21 Jan 2022 08:08:58 +0100

Only x86 architecture is concerned here.

According to the kernel document, under x86 architecture, KVM Hypercall is a 3-byte instruction, vmcall instruction or vmmcall instruction.

Up to four parameters are transmitted through registers rbx, rcx, rdx and rsi. Then, the call number of hypercall is stored in rax, and the call return value is also stored in rax without involving other registers.

Principle analysis

VM call interface

arch/x86/include/asm/kvm_para.h defines the hypercall interface:

#define KVM_HYPERCALL \
        ALTERNATIVE("vmcall", "vmmcall", X86_FEATURE_VMMCALL)

static inline long kvm_hypercall0(unsigned int nr)
    {
    long ret;
    asm volatile(KVM_HYPERCALL
             : "=a"(ret)
             : "a"(nr)
             : "memory");
    return ret;
}

static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
{
    long ret;
    asm volatile(KVM_HYPERCALL
             : "=a"(ret)
             : "a"(nr), "b"(p1)
             : "memory");
    return ret;
}

static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
                  unsigned long p2)
{
    long ret;
    asm volatile(KVM_HYPERCALL
             : "=a"(ret)
             : "a"(nr), "b"(p1), "c"(p2)
             : "memory");
    return ret;
}

static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
                  unsigned long p2, unsigned long p3)
{
    long ret;
    asm volatile(KVM_HYPERCALL
             : "=a"(ret)
             : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
             : "memory");
    return ret;
}

static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
                  unsigned long p2, unsigned long p3,
                  unsigned long p4)
{
    long ret;
    asm volatile(KVM_HYPERCALL
             : "=a"(ret)
             : "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4)
             : "memory");
    return ret;
}

static inline long kvm_sev_hypercall3(unsigned int nr, unsigned long p1,
                      unsigned long p2, unsigned long p3)
{
    long ret;

    asm volatile("vmmcall"
             : "=a"(ret)
             : "a"(nr), "b"(p1), "c"(p2), "d"(p3)
             : "memory");
    return ret;
}

The main difference is the difference of parameters.

KVM_HYPERCALL is a 3-byte instruction sequence, which is the vmcall instruction in x86 architecture. The vmcall instruction causes VM exit to VMM.

In which kvm_hypercall4() function as an example:

"= a"(ret): indicates that the return value is stored in the rax register;

"a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4): indicates that the call number nr is stored in the rax register, the p1 parameter is stored in rbx, p2 is stored in rcx, p3 is stored in rdx, and p4 is stored in rsi.

VMM processing interface

arch/x86/kvm/vmx/vmx. VM is defined in C_ handle handling of exit:

/*
 * The exit handlers return 1 if the exit was handled fully and guest execution
 * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
 * to be done to userspace and return 0.
 */
static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
	...
    [EXIT_REASON_VMCALL]                  = handle_vmcall,
	...
}

static int handle_vmcall(struct kvm_vcpu *vcpu)
{
    return kvm_emulate_hypercall(vcpu);
}

Function KVM for handling hypercall_ emulate_ Hypercall() is in arch / x86 / KVM / x86 As defined in C:

int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
    unsigned long nr, a0, a1, a2, a3, ret;
    int op_64_bit;

    if (kvm_hv_hypercall_enabled(vcpu->kvm))
        return kvm_hv_hypercall(vcpu);

    nr = kvm_rax_read(vcpu);		/*Fetching parameters from registers*/
    a0 = kvm_rbx_read(vcpu);
    a1 = kvm_rcx_read(vcpu);
    a2 = kvm_rdx_read(vcpu);
    a3 = kvm_rsi_read(vcpu);

    trace_kvm_hypercall(nr, a0, a1, a2, a3);

    op_64_bit = is_64_bit_mode(vcpu);
    if (!op_64_bit) {
        nr &= 0xFFFFFFFF;
        a0 &= 0xFFFFFFFF;
        a1 &= 0xFFFFFFFF;
        a2 &= 0xFFFFFFFF;
        a3 &= 0xFFFFFFFF;
    }

    if (kvm_x86_ops.get_cpl(vcpu) != 0) {
        ret = -KVM_EPERM;
        goto out;
    }
    
    ret = -KVM_ENOSYS;

    switch (nr) {			/*Classify according to the call number nr*/
    case KVM_HC_VAPIC_POLL_IRQ:
        ret = 0;
        break;
    case KVM_HC_KICK_CPU:
        if (!guest_pv_has(vcpu, KVM_FEATURE_PV_UNHALT))
            break;

        kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
        kvm_sched_yield(vcpu->kvm, a1);
        ret = 0;
        break;
#ifdef CONFIG_X86_64
    case KVM_HC_CLOCK_PAIRING:
        ret = kvm_pv_clock_pairing(vcpu, a0, a1);
        break;
#endif
    case KVM_HC_SEND_IPI:
        if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SEND_IPI))
            break;

        ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
        break;
    case KVM_HC_SCHED_YIELD:
        if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
            break;

        kvm_sched_yield(vcpu->kvm, a0);
        ret = 0;
        break;
    default:
        ret = -KVM_ENOSYS;
        break;
    }
out:
    if (!op_64_bit)
        ret = (u32)ret;
    kvm_rax_write(vcpu, ret);		/*Writes the result back to the rax register*/

    ++vcpu->stat.hypercalls;
    return kvm_skip_emulated_instruction(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);

Among them, the calling number nr has the following categories:

  • KVM_HC_VAPIC_POLL_IRQ

    Trigger VM client exit to check for pending interrupts when host host re enters.

  • KVM_HC_KICK_CPU

    Wake vcpu from HLT state.

    Use examples:

    A vcpu in the client is staging a resource (such as spinlock). Once it is busy waiting for more than the time threshold, it can execute HLT instructions. After the HLT instruction is executed, VMM will continue to wait for the vcpu to sleep. Another vcpu of the VM client can then be accessed via KVM_ HC_ KICK_ The CPU hypercall wakes up the vcpu with the specified APIC ID (a1 parameter), and the additional parameter a0 is used later.

  • KVM_HC_CLOCK_PAIRING

    Synchronize the clocks of VMM and VM.

    a0: struct KVM of host copy_ clock_ Physical address of offset structure in VM

    a1: clock_type, ATM only supports KVM_CLOCK_PAIRING_WALLCLOCK (0), (corresponding to the CLOCK_REALTIME clock of the host)

  • KVM_HC_SEND_IPI

    Send inter core interrupts to multiple vCPUs. Returns the number of vCPU that successfully transferred IPI.

    hypercall allows the client to send multicast IPI, with up to 128 destination addresses under 64 bits and up to 64 destination addresses under 32 bits.

  • KVM_HC_SCHED_YIELD

    Used to yield if there is a preempted in the IPI target vcpu.

    When the multicast IPI target is being sent, if the vCPU in the target is preempted, the yield gives up.

VM calls hypercall instance

In the kernel code of Linux-5.10.59, search KVM_ Function call place of hypercall *:

[root@localhost linux-5.10.59]# grep --include="*.c" -nr kvm_hypercall* .
./arch/arm64/kvm/pvtime.c:35:long kvm_hypercall_pv_features(struct kvm_vcpu *vcpu)
./arch/arm64/kvm/hypercalls.c:67:               val = kvm_hypercall_pv_features(vcpu);
./arch/x86/kvm/x86.c:8156:      trace_kvm_hypercall(nr, a0, a1, a2, a3);
./arch/x86/kernel/kvm.c:538:                    ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
./arch/x86/kernel/kvm.c:549:            ret = kvm_hypercall4(KVM_HC_SEND_IPI, (unsigned long)ipi_bitmap,
./arch/x86/kernel/kvm.c:594:                    kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
./arch/x86/kernel/kvm.c:872:    kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
./tools/testing/selftests/kvm/lib/x86_64/processor.c:1249:uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
./tools/testing/selftests/kvm/x86_64/kvm_pv_test.c:126: r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
./drivers/ptp/ptp_kvm.c:58:             ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
./drivers/ptp/ptp_kvm.c:119:    ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING,
./drivers/ptp/ptp_kvm.c:180:    ret = kvm_hypercall2(KVM_HC_CLOCK_PAIRING, clock_pair_gpa,

Take one of them as an example, arch / x86 / kernel / KVM Wake up a vcpu in C:

/* Kick a cpu by its apicid. Used to wake up a halted vcpu */
static void kvm_kick_cpu(int cpu)
{
    int apicid;
    unsigned long flags = 0;

    apicid = per_cpu(x86_cpu_to_apicid, cpu);
    kvm_hypercall2(KVM_HC_KICK_CPU, flags, apicid);
}

Topics: Linux kvm