system/xen: Updated for version 4.11.0

Signed-off-by: Mario Preksavec <mario@slackware.hr>
author: Mario Preksavec <mario@slackware.hr> 2018-08-25 14:16:23 +0200
committer: Willy Sudiarto Raharjo <willysr@slackbuilds.org> 2018-09-01 07:32:30 +0700
commit: 9be84725e758c71832b27d3b3918cd67cc65f182 (patch)
tree: 7617b9cb8c97051797f9464a2b0e396a1b303d20
parent: 78ff47b691fb8043946cb8bcc3b820b7369d9d7f (diff)
download: slackbuilds-9be84725e758c71832b27d3b3918cd67cc65f182.tar.gz
19 files changed, 4126 insertions, 3936 deletions
diff --git a/system/xen/dom0/README.dom0 b/system/xen/dom0/README.dom0
index 09f8373b8c..025189788b 100644
--- a/system/xen/dom0/README.dom0
+++ b/system/xen/dom0/README.dom0
@@ -46,7 +46,7 @@ Xen EFI binary.
 
 To make things a bit easier, a copy of Xen EFI binary can be found here:
 
-  http://slackware.hr/~mario/xen/xen-4.10.1.efi.gz
+  http://slackware.hr/~mario/xen/xen-4.11.0.efi.gz
 
 If an automatic boot to Xen kernel is desired, the binary should be renamed and
 copied to the following location: /boot/efi/EFI/BOOT/bootx64.efi
diff --git a/system/xen/dom0/kernel-xen.sh b/system/xen/dom0/kernel-xen.sh
index 74075da12d..0829676da6 100644
--- a/system/xen/dom0/kernel-xen.sh
+++ b/system/xen/dom0/kernel-xen.sh
@@ -6,7 +6,7 @@
 # Modified by Mario Preksavec <mario@slackware.hr>
 
 KERNEL=${KERNEL:-4.4.118}
-XEN=${XEN:-4.10.1}
+XEN=${XEN:-4.11.0}
 BOOTLOADER=${BOOTLOADER:-lilo}
 
 ROOTMOD=${ROOTMOD:-ext4}
diff --git a/system/xen/patches/xen-4.10.2-pre.patch b/system/xen/patches/xen-4.10.2-pre.patch
deleted file mode 100644
index 42477696e1..0000000000
--- a/system/xen/patches/xen-4.10.2-pre.patch
+++ /dev/null
@@ -1,1631 +0,0 @@
-diff --git a/tools/libacpi/Makefile b/tools/libacpi/Makefile
-index a47a658a25..c17f3924cc 100644
---- a/tools/libacpi/Makefile
-+++ b/tools/libacpi/Makefile
-@@ -43,7 +43,7 @@ all: $(C_SRC) $(H_SRC)
- 
- $(H_SRC): $(ACPI_BUILD_DIR)/%.h: %.asl iasl
- 	iasl -vs -p $(ACPI_BUILD_DIR)/$*.$(TMP_SUFFIX) -tc $<
--	sed -e 's/AmlCode/$*/g' $(ACPI_BUILD_DIR)/$*.hex >$@
-+	sed -e 's/AmlCode/$*/g' -e 's/_aml_code//g' $(ACPI_BUILD_DIR)/$*.hex >$@
- 	rm -f $(addprefix $(ACPI_BUILD_DIR)/, $*.aml $*.hex)
-  
- $(MK_DSDT): mk_dsdt.c
-@@ -76,7 +76,7 @@ $(ACPI_BUILD_DIR)/dsdt_anycpu_arm.asl: $(MK_DSDT)
- 
- $(C_SRC): $(ACPI_BUILD_DIR)/%.c: iasl $(ACPI_BUILD_DIR)/%.asl
- 	iasl -vs -p $(ACPI_BUILD_DIR)/$*.$(TMP_SUFFIX) -tc $(ACPI_BUILD_DIR)/$*.asl
--	sed -e 's/AmlCode/$*/g' $(ACPI_BUILD_DIR)/$*.hex > $@.$(TMP_SUFFIX)
-+	sed -e 's/AmlCode/$*/g' -e 's/_aml_code//g' $(ACPI_BUILD_DIR)/$*.hex > $@.$(TMP_SUFFIX)
- 	echo "int $*_len=sizeof($*);" >> $@.$(TMP_SUFFIX)
- 	mv -f $@.$(TMP_SUFFIX) $@
- 	rm -f $(addprefix $(ACPI_BUILD_DIR)/, $*.aml $*.hex)
-#diff --git a/xen/Makefile b/xen/Makefile
-#index ecec297b9b..580af86931 100644
-#--- a/xen/Makefile
-#+++ b/xen/Makefile
-#@@ -2,7 +2,7 @@
-# # All other places this is stored (eg. compile.h) should be autogenerated.
-# export XEN_VERSION       = 4
-# export XEN_SUBVERSION    = 10
-#-export XEN_EXTRAVERSION ?= .1$(XEN_VENDORVERSION)
-#+export XEN_EXTRAVERSION ?= .2-pre$(XEN_VENDORVERSION)
-# export XEN_FULLVERSION   = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
-# -include xen-version
-# 
-diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
-index 1e4e5680a7..f7085d3c7b 100644
---- a/xen/arch/x86/acpi/power.c
-+++ b/xen/arch/x86/acpi/power.c
-@@ -28,6 +28,7 @@
- #include <asm/tboot.h>
- #include <asm/apic.h>
- #include <asm/io_apic.h>
-+#include <asm/spec_ctrl.h>
- #include <acpi/cpufreq/cpufreq.h>
- 
- uint32_t system_reset_counter = 1;
-@@ -163,6 +164,7 @@ static int enter_state(u32 state)
- {
-     unsigned long flags;
-     int error;
-+    struct cpu_info *ci;
-     unsigned long cr4;
- 
-     if ( (state <= ACPI_STATE_S0) || (state > ACPI_S_STATES_MAX) )
-@@ -203,12 +205,18 @@ static int enter_state(u32 state)
-         printk(XENLOG_ERR "Some devices failed to power down.");
-         system_state = SYS_STATE_resume;
-         device_power_up(error);
-+        console_end_sync();
-         error = -EIO;
-         goto done;
-     }
-     else
-         error = 0;
- 
-+    ci = get_cpu_info();
-+    spec_ctrl_enter_idle(ci);
-+    /* Avoid NMI/#MC using MSR_SPEC_CTRL until we've reloaded microcode. */
-+    ci->bti_ist_info = 0;
-+
-     ACPI_FLUSH_CPU_CACHE();
- 
-     switch ( state )
-@@ -243,17 +251,23 @@ static int enter_state(u32 state)
-     if ( (state == ACPI_STATE_S3) && error )
-         tboot_s3_error(error);
- 
-+    console_end_sync();
-+
-+    microcode_resume_cpu(0);
-+
-+    /* Re-enabled default NMI/#MC use of MSR_SPEC_CTRL. */
-+    ci->bti_ist_info = default_bti_ist_info;
-+    spec_ctrl_exit_idle(ci);
-+
-  done:
-     spin_debug_enable();
-     local_irq_restore(flags);
--    console_end_sync();
-     acpi_sleep_post(state);
-     if ( hvm_cpu_up() )
-         BUG();
-+    cpufreq_add_cpu(0);
- 
-  enable_cpu:
--    cpufreq_add_cpu(0);
--    microcode_resume_cpu(0);
-     rcu_barrier();
-     mtrr_aps_sync_begin();
-     enable_nonboot_cpus();
-diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
-index fdb2bf1779..136adadb63 100644
---- a/xen/arch/x86/cpu/common.c
-+++ b/xen/arch/x86/cpu/common.c
-@@ -747,6 +747,7 @@ void load_system_tables(void)
- 			[IST_MCE - 1] = stack_top + IST_MCE * PAGE_SIZE,
- 			[IST_DF  - 1] = stack_top + IST_DF  * PAGE_SIZE,
- 			[IST_NMI - 1] = stack_top + IST_NMI * PAGE_SIZE,
-+			[IST_DB  - 1] = stack_top + IST_DB  * PAGE_SIZE,
- 
- 			[IST_MAX ... ARRAY_SIZE(tss->ist) - 1] =
- 				0x8600111111111111ul,
-@@ -774,6 +775,7 @@ void load_system_tables(void)
- 	set_ist(&idt_tables[cpu][TRAP_double_fault],  IST_DF);
- 	set_ist(&idt_tables[cpu][TRAP_nmi],	      IST_NMI);
- 	set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE);
-+	set_ist(&idt_tables[cpu][TRAP_debug],         IST_DB);
- 
- 	/*
- 	 * Bottom-of-stack must be 16-byte aligned!
-diff --git a/xen/arch/x86/hpet.c b/xen/arch/x86/hpet.c
-index 8229c635e4..f18cbbd55a 100644
---- a/xen/arch/x86/hpet.c
-+++ b/xen/arch/x86/hpet.c
-@@ -509,6 +509,8 @@ static void hpet_attach_channel(unsigned int cpu,
- static void hpet_detach_channel(unsigned int cpu,
-                                 struct hpet_event_channel *ch)
- {
-+    unsigned int next;
-+
-     spin_lock_irq(&ch->lock);
- 
-     ASSERT(ch == per_cpu(cpu_bc_channel, cpu));
-@@ -517,7 +519,7 @@ static void hpet_detach_channel(unsigned int cpu,
- 
-     if ( cpu != ch->cpu )
-         spin_unlock_irq(&ch->lock);
--    else if ( cpumask_empty(ch->cpumask) )
-+    else if ( (next = cpumask_first(ch->cpumask)) >= nr_cpu_ids )
-     {
-         ch->cpu = -1;
-         clear_bit(HPET_EVT_USED_BIT, &ch->flags);
-@@ -525,7 +527,7 @@ static void hpet_detach_channel(unsigned int cpu,
-     }
-     else
-     {
--        ch->cpu = cpumask_first(ch->cpumask);
-+        ch->cpu = next;
-         set_channel_irq_affinity(ch);
-         local_irq_enable();
-     }
-diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c
-index b282089e03..131480fdd9 100644
---- a/xen/arch/x86/hvm/emulate.c
-+++ b/xen/arch/x86/hvm/emulate.c
-@@ -2113,22 +2113,20 @@ static int _hvm_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt,
- 
-     vio->mmio_retry = 0;
- 
--    switch ( rc = x86_emulate(&hvmemul_ctxt->ctxt, ops) )
-+    rc = x86_emulate(&hvmemul_ctxt->ctxt, ops);
-+    if ( rc == X86EMUL_OKAY && vio->mmio_retry )
-+        rc = X86EMUL_RETRY;
-+
-+    if ( !hvm_vcpu_io_need_completion(vio) )
-     {
--    case X86EMUL_OKAY:
--        if ( vio->mmio_retry )
--            rc = X86EMUL_RETRY;
--        /* fall through */
--    default:
-         vio->mmio_cache_count = 0;
-         vio->mmio_insn_bytes = 0;
--        break;
--
--    case X86EMUL_RETRY:
-+    }
-+    else
-+    {
-         BUILD_BUG_ON(sizeof(vio->mmio_insn) < sizeof(hvmemul_ctxt->insn_buf));
-         vio->mmio_insn_bytes = hvmemul_ctxt->insn_buf_bytes;
-         memcpy(vio->mmio_insn, hvmemul_ctxt->insn_buf, vio->mmio_insn_bytes);
--        break;
-     }
- 
-     if ( hvmemul_ctxt->ctxt.retire.singlestep )
-diff --git a/xen/arch/x86/hvm/hpet.c b/xen/arch/x86/hvm/hpet.c
-index f7aed7f69e..28377091ca 100644
---- a/xen/arch/x86/hvm/hpet.c
-+++ b/xen/arch/x86/hvm/hpet.c
-@@ -264,13 +264,20 @@ static void hpet_set_timer(HPETState *h, unsigned int tn,
-         diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN))
-             ? (uint32_t)diff : 0;
- 
-+    destroy_periodic_time(&h->pt[tn]);
-     if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) )
-+    {
-         /* if LegacyReplacementRoute bit is set, HPET specification requires
-            timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
-            timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
-         irq = (tn == 0) ? 0 : 8;
-+        h->pt[tn].source = PTSRC_isa;
-+    }
-     else
-+    {
-         irq = timer_int_route(h, tn);
-+        h->pt[tn].source = PTSRC_ioapic;
-+    }
- 
-     /*
-      * diff is the time from now when the timer should fire, for a periodic
-diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
-index d5afe20cc8..25b2445429 100644
---- a/xen/arch/x86/hvm/ioreq.c
-+++ b/xen/arch/x86/hvm/ioreq.c
-@@ -87,14 +87,17 @@ static void hvm_io_assist(struct hvm_ioreq_vcpu *sv, uint64_t data)
- 
- static bool hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p)
- {
-+    unsigned int prev_state = STATE_IOREQ_NONE;
-+
-     while ( sv->pending )
-     {
-         unsigned int state = p->state;
- 
--        rmb();
--        switch ( state )
-+        smp_rmb();
-+
-+    recheck:
-+        if ( unlikely(state == STATE_IOREQ_NONE) )
-         {
--        case STATE_IOREQ_NONE:
-             /*
-              * The only reason we should see this case is when an
-              * emulator is dying and it races with an I/O being
-@@ -102,14 +105,30 @@ static bool hvm_wait_for_io(struct hvm_ioreq_vcpu *sv, ioreq_t *p)
-              */
-             hvm_io_assist(sv, ~0ul);
-             break;
-+        }
-+
-+        if ( unlikely(state < prev_state) )
-+        {
-+            gdprintk(XENLOG_ERR, "Weird HVM ioreq state transition %u -> %u\n",
-+                     prev_state, state);
-+            sv->pending = false;
-+            domain_crash(sv->vcpu->domain);
-+            return false; /* bail */
-+        }
-+
-+        switch ( prev_state = state )
-+        {
-         case STATE_IORESP_READY: /* IORESP_READY -> NONE */
-             p->state = STATE_IOREQ_NONE;
-             hvm_io_assist(sv, p->data);
-             break;
-         case STATE_IOREQ_READY:  /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
-         case STATE_IOREQ_INPROCESS:
--            wait_on_xen_event_channel(sv->ioreq_evtchn, p->state != state);
--            break;
-+            wait_on_xen_event_channel(sv->ioreq_evtchn,
-+                                      ({ state = p->state;
-+                                         smp_rmb();
-+                                         state != prev_state; }));
-+            goto recheck;
-         default:
-             gdprintk(XENLOG_ERR, "Weird HVM iorequest state %u\n", state);
-             sv->pending = false;
-diff --git a/xen/arch/x86/hvm/irq.c b/xen/arch/x86/hvm/irq.c
-index f528e2d081..c85d004402 100644
---- a/xen/arch/x86/hvm/irq.c
-+++ b/xen/arch/x86/hvm/irq.c
-@@ -41,6 +41,26 @@ static void assert_gsi(struct domain *d, unsigned ioapic_gsi)
-     vioapic_irq_positive_edge(d, ioapic_gsi);
- }
- 
-+int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level)
-+{
-+    struct hvm_irq *hvm_irq = hvm_domain_irq(d);
-+    int vector;
-+
-+    if ( gsi >= hvm_irq->nr_gsis )
-+    {
-+        ASSERT_UNREACHABLE();
-+        return -1;
-+    }
-+
-+    spin_lock(&d->arch.hvm_domain.irq_lock);
-+    if ( !level || hvm_irq->gsi_assert_count[gsi]++ == 0 )
-+        assert_gsi(d, gsi);
-+    vector = vioapic_get_vector(d, gsi);
-+    spin_unlock(&d->arch.hvm_domain.irq_lock);
-+
-+    return vector;
-+}
-+
- static void assert_irq(struct domain *d, unsigned ioapic_gsi, unsigned pic_irq)
- {
-     assert_gsi(d, ioapic_gsi);
-diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
-index dedec5752d..3b72b4dc2a 100644
---- a/xen/arch/x86/hvm/svm/svm.c
-+++ b/xen/arch/x86/hvm/svm/svm.c
-@@ -1046,6 +1046,7 @@ static void svm_ctxt_switch_from(struct vcpu *v)
-     set_ist(&idt_tables[cpu][TRAP_double_fault],  IST_DF);
-     set_ist(&idt_tables[cpu][TRAP_nmi],           IST_NMI);
-     set_ist(&idt_tables[cpu][TRAP_machine_check], IST_MCE);
-+    set_ist(&idt_tables[cpu][TRAP_debug],         IST_DB);
- }
- 
- static void svm_ctxt_switch_to(struct vcpu *v)
-@@ -1067,6 +1068,7 @@ static void svm_ctxt_switch_to(struct vcpu *v)
-     set_ist(&idt_tables[cpu][TRAP_double_fault],  IST_NONE);
-     set_ist(&idt_tables[cpu][TRAP_nmi],           IST_NONE);
-     set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
-+    set_ist(&idt_tables[cpu][TRAP_debug],         IST_NONE);
- 
-     svm_restore_dr(v);
- 
-@@ -1836,6 +1838,25 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
-     struct vcpu *v = current;
-     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
- 
-+    switch ( msr )
-+    {
-+        /*
-+         * Sync not needed while the cross-vendor logic is in unilateral effect.
-+    case MSR_IA32_SYSENTER_CS:
-+    case MSR_IA32_SYSENTER_ESP:
-+    case MSR_IA32_SYSENTER_EIP:
-+         */
-+    case MSR_STAR:
-+    case MSR_LSTAR:
-+    case MSR_CSTAR:
-+    case MSR_SYSCALL_MASK:
-+    case MSR_FS_BASE:
-+    case MSR_GS_BASE:
-+    case MSR_SHADOW_GS_BASE:
-+        svm_sync_vmcb(v);
-+        break;
-+    }
-+
-     switch ( msr )
-     {
-     case MSR_IA32_SYSENTER_CS:
-@@ -1848,6 +1869,34 @@ static int svm_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
-         *msr_content = v->arch.hvm_svm.guest_sysenter_eip;
-         break;
- 
-+    case MSR_STAR:
-+        *msr_content = vmcb->star;
-+        break;
-+
-+    case MSR_LSTAR:
-+        *msr_content = vmcb->lstar;
-+        break;
-+
-+    case MSR_CSTAR:
-+        *msr_content = vmcb->cstar;
-+        break;
-+
-+    case MSR_SYSCALL_MASK:
-+        *msr_content = vmcb->sfmask;
-+        break;
-+
-+    case MSR_FS_BASE:
-+        *msr_content = vmcb->fs.base;
-+        break;
-+
-+    case MSR_GS_BASE:
-+        *msr_content = vmcb->gs.base;
-+        break;
-+
-+    case MSR_SHADOW_GS_BASE:
-+        *msr_content = vmcb->kerngsbase;
-+        break;
-+
-     case MSR_IA32_MCx_MISC(4): /* Threshold register */
-     case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
-         /*
-@@ -1976,32 +2025,81 @@ static int svm_msr_write_intercept(unsigned int msr, uint64_t msr_content)
-     int ret, result = X86EMUL_OKAY;
-     struct vcpu *v = current;
-     struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
--    int sync = 0;
-+    bool sync = false;
- 
-     switch ( msr )
-     {
-     case MSR_IA32_SYSENTER_CS:
-     case MSR_IA32_SYSENTER_ESP:
-     case MSR_IA32_SYSENTER_EIP:
--        sync = 1;
--        break;
--    default:
-+    case MSR_STAR:
-+    case MSR_LSTAR:
-+    case MSR_CSTAR:
-+    case MSR_SYSCALL_MASK:
-+    case MSR_FS_BASE:
-+    case MSR_GS_BASE:
-+    case MSR_SHADOW_GS_BASE:
-+        sync = true;
-         break;
-     }
- 
-     if ( sync )
--        svm_sync_vmcb(v);    
-+        svm_sync_vmcb(v);
- 
-     switch ( msr )
-     {
-+    case MSR_IA32_SYSENTER_ESP:
-+    case MSR_IA32_SYSENTER_EIP:
-+    case MSR_LSTAR:
-+    case MSR_CSTAR:
-+    case MSR_FS_BASE:
-+    case MSR_GS_BASE:
-+    case MSR_SHADOW_GS_BASE:
-+        if ( !is_canonical_address(msr_content) )
-+            goto gpf;
-+
-+        switch ( msr )
-+        {
-+        case MSR_IA32_SYSENTER_ESP:
-+            vmcb->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp = msr_content;
-+            break;
-+
-+        case MSR_IA32_SYSENTER_EIP:
-+            vmcb->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip = msr_content;
-+            break;
-+
-+        case MSR_LSTAR:
-+            vmcb->lstar = msr_content;
-+            break;
-+
-+        case MSR_CSTAR:
-+            vmcb->cstar = msr_content;
-+            break;
-+
-+        case MSR_FS_BASE:
-+            vmcb->fs.base = msr_content;
-+            break;
-+
-+        case MSR_GS_BASE:
-+            vmcb->gs.base = msr_content;
-+            break;
-+
-+        case MSR_SHADOW_GS_BASE:
-+            vmcb->kerngsbase = msr_content;
-+            break;
-+        }
-+        break;
-+
-     case MSR_IA32_SYSENTER_CS:
-         vmcb->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs = msr_content;
-         break;
--    case MSR_IA32_SYSENTER_ESP:
--        vmcb->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp = msr_content;
-+
-+    case MSR_STAR:
-+        vmcb->star = msr_content;
-         break;
--    case MSR_IA32_SYSENTER_EIP:
--        vmcb->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip = msr_content;
-+
-+    case MSR_SYSCALL_MASK:
-+        vmcb->sfmask = msr_content;
-         break;
- 
-     case MSR_IA32_DEBUGCTLMSR:
-diff --git a/xen/arch/x86/hvm/svm/svmdebug.c b/xen/arch/x86/hvm/svm/svmdebug.c
-index 89ef2db932..b5b946aa94 100644
---- a/xen/arch/x86/hvm/svm/svmdebug.c
-+++ b/xen/arch/x86/hvm/svm/svmdebug.c
-@@ -131,9 +131,8 @@ bool svm_vmcb_isvalid(const char *from, const struct vmcb_struct *vmcb,
-         PRINTF("DR7: bits [63:32] are not zero (%#"PRIx64")\n",
-                vmcb_get_dr7(vmcb));
- 
--    if ( efer & ~(EFER_SCE | EFER_LME | EFER_LMA | EFER_NX | EFER_SVME |
--                  EFER_LMSLE | EFER_FFXSE) )
--        PRINTF("EFER: undefined bits are not zero (%#"PRIx64")\n", efer);
-+    if ( efer & ~EFER_KNOWN_MASK )
-+        PRINTF("EFER: unknown bits are not zero (%#"PRIx64")\n", efer);
- 
-     if ( hvm_efer_valid(v, efer, -1) )
-         PRINTF("EFER: %s (%"PRIx64")\n", hvm_efer_valid(v, efer, -1), efer);
-diff --git a/xen/arch/x86/hvm/viridian.c b/xen/arch/x86/hvm/viridian.c
-index f0fa59d7d5..b02a70d086 100644
---- a/xen/arch/x86/hvm/viridian.c
-+++ b/xen/arch/x86/hvm/viridian.c
-@@ -245,7 +245,7 @@ void cpuid_viridian_leaves(const struct vcpu *v, uint32_t leaf,
-         };
-         union {
-             HV_PARTITION_PRIVILEGE_MASK mask;
--            uint32_t lo, hi;
-+            struct { uint32_t lo, hi; };
-         } u;
- 
-         if ( !(viridian_feature_mask(d) & HVMPV_no_freq) )
-@@ -966,12 +966,10 @@ int viridian_hypercall(struct cpu_user_regs *regs)
-         gprintk(XENLOG_WARNING, "unimplemented hypercall %04x\n",
-                 input.call_code);
-         /* Fallthrough. */
--    case HvGetPartitionId:
-     case HvExtCallQueryCapabilities:
-         /*
--         * These hypercalls seem to be erroneously issued by Windows
--         * despite neither AccessPartitionId nor EnableExtendedHypercalls
--         * being set in CPUID leaf 2.
-+         * This hypercall seems to be erroneously issued by Windows
-+         * despite EnableExtendedHypercalls not being set in CPUID leaf 2.
-          * Given that return a status of 'invalid code' has not so far
-          * caused any problems it's not worth logging.
-          */
-diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c
-index 181f4cb631..04e3c2e15b 100644
---- a/xen/arch/x86/hvm/vpt.c
-+++ b/xen/arch/x86/hvm/vpt.c
-@@ -107,31 +107,49 @@ static int pt_irq_vector(struct periodic_time *pt, enum hvm_intsrc src)
- static int pt_irq_masked(struct periodic_time *pt)
- {
-     struct vcpu *v = pt->vcpu;
--    unsigned int gsi, isa_irq;
--    int mask;
--    uint8_t pic_imr;
-+    unsigned int gsi = pt->irq;
- 
--    if ( pt->source == PTSRC_lapic )
-+    switch ( pt->source )
-+    {
-+    case PTSRC_lapic:
-     {
-         struct vlapic *vlapic = vcpu_vlapic(v);
-+
-         return (!vlapic_enabled(vlapic) ||
-                 (vlapic_get_reg(vlapic, APIC_LVTT) & APIC_LVT_MASKED));
-     }
- 
--    isa_irq = pt->irq;
--    gsi = hvm_isa_irq_to_gsi(isa_irq);
--    pic_imr = v->domain->arch.hvm_domain.vpic[isa_irq >> 3].imr;
--    mask = vioapic_get_mask(v->domain, gsi);
--    if ( mask < 0 )
-+    case PTSRC_isa:
-     {
--        dprintk(XENLOG_WARNING, "d%u: invalid GSI (%u) for platform timer\n",
--                v->domain->domain_id, gsi);
--        domain_crash(v->domain);
--        return -1;
-+        uint8_t pic_imr = v->domain->arch.hvm_domain.vpic[pt->irq >> 3].imr;
-+
-+        /* Check if the interrupt is unmasked in the PIC. */
-+        if ( !(pic_imr & (1 << (pt->irq & 7))) && vlapic_accept_pic_intr(v) )
-+            return 0;
-+
-+        gsi = hvm_isa_irq_to_gsi(pt->irq);
-+    }
-+
-+    /* Fallthrough to check if the interrupt is masked on the IO APIC. */
-+    case PTSRC_ioapic:
-+    {
-+        int mask = vioapic_get_mask(v->domain, gsi);
-+
-+        if ( mask < 0 )
-+        {
-+            dprintk(XENLOG_WARNING,
-+                    "d%d: invalid GSI (%u) for platform timer\n",
-+                    v->domain->domain_id, gsi);
-+            domain_crash(v->domain);
-+            return -1;
-+        }
-+
-+        return mask;
-+    }
-     }
- 
--    return (((pic_imr & (1 << (isa_irq & 7))) || !vlapic_accept_pic_intr(v)) &&
--            mask);
-+    ASSERT_UNREACHABLE();
-+    return 1;
- }
- 
- static void pt_lock(struct periodic_time *pt)
-@@ -252,7 +270,7 @@ int pt_update_irq(struct vcpu *v)
-     struct list_head *head = &v->arch.hvm_vcpu.tm_list;
-     struct periodic_time *pt, *temp, *earliest_pt;
-     uint64_t max_lag;
--    int irq, is_lapic, pt_vector;
-+    int irq, pt_vector = -1;
- 
-     spin_lock(&v->arch.hvm_vcpu.tm_lock);
- 
-@@ -288,29 +306,26 @@ int pt_update_irq(struct vcpu *v)
- 
-     earliest_pt->irq_issued = 1;
-     irq = earliest_pt->irq;
--    is_lapic = (earliest_pt->source == PTSRC_lapic);
- 
-     spin_unlock(&v->arch.hvm_vcpu.tm_lock);
- 
--    /*
--     * If periodic timer interrut is handled by lapic, its vector in
--     * IRR is returned and used to set eoi_exit_bitmap for virtual
--     * interrupt delivery case. Otherwise return -1 to do nothing.
--     */
--    if ( is_lapic )
-+    switch ( earliest_pt->source )
-     {
-+    case PTSRC_lapic:
-+        /*
-+         * If periodic timer interrupt is handled by lapic, its vector in
-+         * IRR is returned and used to set eoi_exit_bitmap for virtual
-+         * interrupt delivery case. Otherwise return -1 to do nothing.
-+         */
-         vlapic_set_irq(vcpu_vlapic(v), irq, 0);
-         pt_vector = irq;
--    }
--    else
--    {
-+        break;
-+
-+    case PTSRC_isa:
-         hvm_isa_irq_deassert(v->domain, irq);
-         if ( platform_legacy_irq(irq) && vlapic_accept_pic_intr(v) &&
-              v->domain->arch.hvm_domain.vpic[irq >> 3].int_output )
--        {
-             hvm_isa_irq_assert(v->domain, irq, NULL);
--            pt_vector = -1;
--        }
-         else
-         {
-             pt_vector = hvm_isa_irq_assert(v->domain, irq, vioapic_get_vector);
-@@ -321,6 +336,17 @@ int pt_update_irq(struct vcpu *v)
-             if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
-                 pt_vector = -1;
-         }
-+        break;
-+
-+    case PTSRC_ioapic:
-+        /*
-+         * NB: At the moment IO-APIC routed interrupts generated by vpt devices
-+         * (HPET) are edge-triggered.
-+         */
-+        pt_vector = hvm_ioapic_assert(v->domain, irq, false);
-+        if ( pt_vector < 0 || !vlapic_test_irq(vcpu_vlapic(v), pt_vector) )
-+            pt_vector = -1;
-+        break;
-     }
- 
-     return pt_vector;
-@@ -418,7 +444,14 @@ void create_periodic_time(
-     struct vcpu *v, struct periodic_time *pt, uint64_t delta,
-     uint64_t period, uint8_t irq, time_cb *cb, void *data)
- {
--    ASSERT(pt->source != 0);
-+    if ( !pt->source ||
-+         (pt->irq >= NR_ISAIRQS && pt->source == PTSRC_isa) ||
-+         (pt->irq >= hvm_domain_irq(v->domain)->nr_gsis &&
-+          pt->source == PTSRC_ioapic) )
-+    {
-+        ASSERT_UNREACHABLE();
-+        return;
-+    }
- 
-     destroy_periodic_time(pt);
- 
-@@ -498,7 +531,7 @@ static void pt_adjust_vcpu(struct periodic_time *pt, struct vcpu *v)
- {
-     int on_list;
- 
--    ASSERT(pt->source == PTSRC_isa);
-+    ASSERT(pt->source == PTSRC_isa || pt->source == PTSRC_ioapic);
- 
-     if ( pt->vcpu == NULL )
-         return;
-diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
-index 642ca312bf..c281936af0 100644
---- a/xen/arch/x86/pv/emul-priv-op.c
-+++ b/xen/arch/x86/pv/emul-priv-op.c
-@@ -813,26 +813,6 @@ static int write_cr(unsigned int reg, unsigned long val,
-     return X86EMUL_UNHANDLEABLE;
- }
- 
--static int read_dr(unsigned int reg, unsigned long *val,
--                   struct x86_emulate_ctxt *ctxt)
--{
--    unsigned long res = do_get_debugreg(reg);
--
--    if ( IS_ERR_VALUE(res) )
--        return X86EMUL_UNHANDLEABLE;
--
--    *val = res;
--
--    return X86EMUL_OKAY;
--}
--
--static int write_dr(unsigned int reg, unsigned long val,
--                    struct x86_emulate_ctxt *ctxt)
--{
--    return do_set_debugreg(reg, val) == 0
--           ? X86EMUL_OKAY : X86EMUL_UNHANDLEABLE;
--}
--
- static inline uint64_t guest_misc_enable(uint64_t val)
- {
-     val &= ~(MSR_IA32_MISC_ENABLE_PERF_AVAIL |
-@@ -906,9 +886,16 @@ static int read_msr(unsigned int reg, uint64_t *val,
-         return X86EMUL_OKAY;
- 
-     case MSR_EFER:
--        *val = read_efer();
-+        /* Hide unknown bits, and unconditionally hide SVME from guests. */
-+        *val = read_efer() & EFER_KNOWN_MASK & ~EFER_SVME;
-+        /*
-+         * Hide the 64-bit features from 32-bit guests.  SCE has
-+         * vendor-dependent behaviour.
-+         */
-         if ( is_pv_32bit_domain(currd) )
--            *val &= ~(EFER_LME | EFER_LMA | EFER_LMSLE);
-+            *val &= ~(EFER_LME | EFER_LMA | EFER_LMSLE |
-+                      (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL
-+                       ? EFER_SCE : 0));
-         return X86EMUL_OKAY;
- 
-     case MSR_K7_FID_VID_CTL:
-@@ -1326,8 +1313,8 @@ static const struct x86_emulate_ops priv_op_ops = {
-     .read_segment        = read_segment,
-     .read_cr             = read_cr,
-     .write_cr            = write_cr,
--    .read_dr             = read_dr,
--    .write_dr            = write_dr,
-+    .read_dr             = x86emul_read_dr,
-+    .write_dr            = x86emul_write_dr,
-     .read_msr            = read_msr,
-     .write_msr           = write_msr,
-     .cpuid               = pv_emul_cpuid,
-diff --git a/xen/arch/x86/pv/misc-hypercalls.c b/xen/arch/x86/pv/misc-hypercalls.c
-index 5862130697..1619be7874 100644
---- a/xen/arch/x86/pv/misc-hypercalls.c
-+++ b/xen/arch/x86/pv/misc-hypercalls.c
-@@ -30,22 +30,10 @@ long do_set_debugreg(int reg, unsigned long value)
- 
- unsigned long do_get_debugreg(int reg)
- {
--    struct vcpu *curr = current;
-+    unsigned long val;
-+    int res = x86emul_read_dr(reg, &val, NULL);
- 
--    switch ( reg )
--    {
--    case 0 ... 3:
--    case 6:
--        return curr->arch.debugreg[reg];
--    case 7:
--        return (curr->arch.debugreg[7] |
--                curr->arch.debugreg[5]);
--    case 4 ... 5:
--        return ((curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) ?
--                curr->arch.debugreg[reg + 2] : 0);
--    }
--
--    return -EINVAL;
-+    return res == X86EMUL_OKAY ? val : -ENODEV;
- }
- 
- long do_fpu_taskswitch(int set)
-diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
-index e1d023428c..f81fc2ca65 100644
---- a/xen/arch/x86/smpboot.c
-+++ b/xen/arch/x86/smpboot.c
-@@ -968,6 +968,7 @@ static int cpu_smpboot_alloc(unsigned int cpu)
-     set_ist(&idt_tables[cpu][TRAP_double_fault],  IST_NONE);
-     set_ist(&idt_tables[cpu][TRAP_nmi],           IST_NONE);
-     set_ist(&idt_tables[cpu][TRAP_machine_check], IST_NONE);
-+    set_ist(&idt_tables[cpu][TRAP_debug],         IST_NONE);
- 
-     for ( stub_page = 0, i = cpu & ~(STUBS_PER_PAGE - 1);
-           i < nr_cpu_ids && i <= (cpu | (STUBS_PER_PAGE - 1)); ++i )
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 3c7447bfe6..fa67a0ffbd 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -97,12 +97,13 @@ static void __init print_details(enum ind_thunk thunk)
-     printk(XENLOG_DEBUG "Speculative mitigation facilities:\n");
- 
-     /* Hardware features which pertain to speculative mitigations. */
--    printk(XENLOG_DEBUG "  Hardware features:%s%s%s%s%s\n",
-+    printk(XENLOG_DEBUG "  Hardware features:%s%s%s%s%s%s\n",
-            (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
-            (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP"     : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBPB))  ? " IBPB"      : "",
-            (caps & ARCH_CAPABILITIES_IBRS_ALL)      ? " IBRS_ALL"  : "",
--           (caps & ARCH_CAPABILITIES_RDCL_NO)       ? " RDCL_NO"   : "");
-+           (caps & ARCH_CAPABILITIES_RDCL_NO)       ? " RDCL_NO"   : "",
-+           (caps & ARCH_CAPS_RSBA)                  ? " RSBA"      : "");
- 
-     /* Compiled-in support which pertains to BTI mitigations. */
-     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
-@@ -135,6 +136,20 @@ static bool __init retpoline_safe(void)
-          boot_cpu_data.x86 != 6 )
-         return false;
- 
-+    if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
-+    {
-+        uint64_t caps;
-+
-+        rdmsrl(MSR_ARCH_CAPABILITIES, caps);
-+
-+        /*
-+         * RBSA may be set by a hypervisor to indicate that we may move to a
-+         * processor which isn't retpoline-safe.
-+         */
-+        if ( caps & ARCH_CAPS_RSBA )
-+            return false;
-+    }
-+
-     switch ( boot_cpu_data.x86_model )
-     {
-     case 0x17: /* Penryn */
-@@ -161,18 +176,40 @@ static bool __init retpoline_safe(void)
-          * versions.
-          */
-     case 0x3d: /* Broadwell */
--        return ucode_rev >= 0x28;
-+        return ucode_rev >= 0x2a;
-     case 0x47: /* Broadwell H */
--        return ucode_rev >= 0x1b;
-+        return ucode_rev >= 0x1d;
-     case 0x4f: /* Broadwell EP/EX */
--        return ucode_rev >= 0xb000025;
-+        return ucode_rev >= 0xb000021;
-     case 0x56: /* Broadwell D */
--        return false; /* TBD. */
-+        switch ( boot_cpu_data.x86_mask )
-+        {
-+        case 2:  return ucode_rev >= 0x15;
-+        case 3:  return ucode_rev >= 0x7000012;
-+        case 4:  return ucode_rev >= 0xf000011;
-+        case 5:  return ucode_rev >= 0xe000009;
-+        default:
-+            printk("Unrecognised CPU stepping %#x - assuming not reptpoline safe\n",
-+                   boot_cpu_data.x86_mask);
-+            return false;
-+        }
-+        break;
- 
-         /*
--         * Skylake and later processors are not retpoline-safe.
-+         * Skylake, Kabylake and Cannonlake processors are not retpoline-safe.
-          */
-+    case 0x4e:
-+    case 0x55:
-+    case 0x5e:
-+    case 0x66:
-+    case 0x67:
-+    case 0x8e:
-+    case 0x9e:
-+        return false;
-+
-     default:
-+        printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n",
-+               boot_cpu_data.x86_model);
-         return false;
-     }
- }
-diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
-index 906124331b..e217b0d6e2 100644
---- a/xen/arch/x86/traps.c
-+++ b/xen/arch/x86/traps.c
-@@ -325,13 +325,13 @@ static void show_guest_stack(struct vcpu *v, const struct cpu_user_regs *regs)
- /*
-  * Notes for get_stack_trace_bottom() and get_stack_dump_bottom()
-  *
-- * Stack pages 0, 1 and 2:
-+ * Stack pages 0 - 3:
-  *   These are all 1-page IST stacks.  Each of these stacks have an exception
-  *   frame and saved register state at the top.  The interesting bound for a
-  *   trace is the word adjacent to this, while the bound for a dump is the
-  *   very top, including the exception frame.
-  *
-- * Stack pages 3, 4 and 5:
-+ * Stack pages 4 and 5:
-  *   None of these are particularly interesting.  With MEMORY_GUARD, page 5 is
-  *   explicitly not present, so attempting to dump or trace it is
-  *   counterproductive.  Without MEMORY_GUARD, it is possible for a call chain
-@@ -352,12 +352,12 @@ unsigned long get_stack_trace_bottom(unsigned long sp)
- {
-     switch ( get_stack_page(sp) )
-     {
--    case 0 ... 2:
-+    case 0 ... 3:
-         return ROUNDUP(sp, PAGE_SIZE) -
-             offsetof(struct cpu_user_regs, es) - sizeof(unsigned long);
- 
- #ifndef MEMORY_GUARD
--    case 3 ... 5:
-+    case 4 ... 5:
- #endif
-     case 6 ... 7:
-         return ROUNDUP(sp, STACK_SIZE) -
-@@ -372,11 +372,11 @@ unsigned long get_stack_dump_bottom(unsigned long sp)
- {
-     switch ( get_stack_page(sp) )
-     {
--    case 0 ... 2:
-+    case 0 ... 3:
-         return ROUNDUP(sp, PAGE_SIZE) - sizeof(unsigned long);
- 
- #ifndef MEMORY_GUARD
--    case 3 ... 5:
-+    case 4 ... 5:
- #endif
-     case 6 ... 7:
-         return ROUNDUP(sp, STACK_SIZE) - sizeof(unsigned long);
-@@ -1761,11 +1761,36 @@ static void ler_enable(void)
- 
- void do_debug(struct cpu_user_regs *regs)
- {
-+    unsigned long dr6;
-     struct vcpu *v = current;
- 
-+    /* Stash dr6 as early as possible. */
-+    dr6 = read_debugreg(6);
-+
-     if ( debugger_trap_entry(TRAP_debug, regs) )
-         return;
- 
-+    /*
-+     * At the time of writing (March 2018), on the subject of %dr6:
-+     *
-+     * The Intel manual says:
-+     *   Certain debug exceptions may clear bits 0-3. The remaining contents
-+     *   of the DR6 register are never cleared by the processor. To avoid
-+     *   confusion in identifying debug exceptions, debug handlers should
-+     *   clear the register (except bit 16, which they should set) before
-+     *   returning to the interrupted task.
-+     *
-+     * The AMD manual says:
-+     *   Bits 15:13 of the DR6 register are not cleared by the processor and
-+     *   must be cleared by software after the contents have been read.
-+     *
-+     * Some bits are reserved set, some are reserved clear, and some bits
-+     * which were previously reserved set are reused and cleared by hardware.
-+     * For future compatibility, reset to the default value, which will allow
-+     * us to spot any bit being changed by hardware to its non-default value.
-+     */
-+    write_debugreg(6, X86_DR6_DEFAULT);
-+
-     if ( !guest_mode(regs) )
-     {
-         if ( regs->eflags & X86_EFLAGS_TF )
-@@ -1784,21 +1809,50 @@ void do_debug(struct cpu_user_regs *regs)
-                 regs->eflags &= ~X86_EFLAGS_TF;
-             }
-         }
--        else
-+
-+        /*
-+         * Check for fault conditions.  General Detect, and instruction
-+         * breakpoints are faults rather than traps, at which point attempting
-+         * to ignore and continue will result in a livelock.
-+         */
-+        if ( dr6 & DR_GENERAL_DETECT )
-         {
--            /*
--             * We ignore watchpoints when they trigger within Xen. This may
--             * happen when a buffer is passed to us which previously had a
--             * watchpoint set on it. No need to bump EIP; the only faulting
--             * trap is an instruction breakpoint, which can't happen to us.
--             */
--            WARN_ON(!search_exception_table(regs));
-+            printk(XENLOG_ERR "Hit General Detect in Xen context\n");
-+            fatal_trap(regs, 0);
-+        }
-+
-+        if ( dr6 & (DR_TRAP3 | DR_TRAP2 | DR_TRAP1 | DR_TRAP0) )
-+        {
-+            unsigned int bp, dr7 = read_debugreg(7) >> DR_CONTROL_SHIFT;
-+
-+            for ( bp = 0; bp < 4; ++bp )
-+            {
-+                if ( (dr6 & (1u << bp)) && /* Breakpoint triggered? */
-+                     ((dr7 & (3u << (bp * DR_CONTROL_SIZE))) == 0) /* Insn? */ )
-+                {
-+                    printk(XENLOG_ERR
-+                           "Hit instruction breakpoint in Xen context\n");
-+                    fatal_trap(regs, 0);
-+                }
-+            }
-         }
-+
-+        /*
-+         * Whatever caused this #DB should be a trap.  Note it and continue.
-+         * Guests can trigger this in certain corner cases, so ensure the
-+         * message is ratelimited.
-+         */
-+        gprintk(XENLOG_WARNING,
-+                "Hit #DB in Xen context: %04x:%p [%ps], stk %04x:%p, dr6 %lx\n",
-+                regs->cs, _p(regs->rip), _p(regs->rip),
-+                regs->ss, _p(regs->rsp), dr6);
-+
-         goto out;
-     }
- 
-     /* Save debug status register where guest OS can peek at it */
--    v->arch.debugreg[6] = read_debugreg(6);
-+    v->arch.debugreg[6] |= (dr6 & ~X86_DR6_DEFAULT);
-+    v->arch.debugreg[6] &= (dr6 | ~X86_DR6_DEFAULT);
- 
-     ler_enable();
-     pv_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
-@@ -1917,6 +1971,7 @@ void __init init_idt_traps(void)
-     set_ist(&idt_table[TRAP_double_fault],  IST_DF);
-     set_ist(&idt_table[TRAP_nmi],           IST_NMI);
-     set_ist(&idt_table[TRAP_machine_check], IST_MCE);
-+    set_ist(&idt_table[TRAP_debug],         IST_DB);
- 
-     /* CPU0 uses the master IDT. */
-     idt_tables[0] = idt_table;
-@@ -1984,6 +2039,12 @@ void activate_debugregs(const struct vcpu *curr)
-     }
- }
- 
-+/*
-+ * Used by hypercalls and the emulator.
-+ *  -ENODEV => #UD
-+ *  -EINVAL => #GP Invalid bit
-+ *  -EPERM  => #GP Valid bit, but not permitted to use
-+ */
- long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
- {
-     int i;
-@@ -2015,7 +2076,17 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
-         if ( v == curr )
-             write_debugreg(3, value);
-         break;
-+
-+    case 4:
-+        if ( v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE )
-+            return -ENODEV;
-+
-+        /* Fallthrough */
-     case 6:
-+        /* The upper 32 bits are strictly reserved. */
-+        if ( value != (uint32_t)value )
-+            return -EINVAL;
-+
-         /*
-          * DR6: Bits 4-11,16-31 reserved (set to 1).
-          *      Bit 12 reserved (set to 0).
-@@ -2025,7 +2096,17 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
-         if ( v == curr )
-             write_debugreg(6, value);
-         break;
-+
-+    case 5:
-+        if ( v->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE )
-+            return -ENODEV;
-+
-+        /* Fallthrough */
-     case 7:
-+        /* The upper 32 bits are strictly reserved. */
-+        if ( value != (uint32_t)value )
-+            return -EINVAL;
-+
-         /*
-          * DR7: Bit 10 reserved (set to 1).
-          *      Bits 11-12,14-15 reserved (set to 0).
-@@ -2038,6 +2119,10 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
-          */
-         if ( value & DR_GENERAL_DETECT )
-             return -EPERM;
-+
-+        /* Zero the IO shadow before recalculating the real %dr7 */
-+        v->arch.debugreg[5] = 0;
-+
-         /* DR7.{G,L}E = 0 => debugging disabled for this domain. */
-         if ( value & DR7_ACTIVE_MASK )
-         {
-@@ -2070,7 +2155,7 @@ long set_debugreg(struct vcpu *v, unsigned int reg, unsigned long value)
-             write_debugreg(7, value);
-         break;
-     default:
--        return -EINVAL;
-+        return -ENODEV;
-     }
- 
-     v->arch.debugreg[reg] = value;
-diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
-index 75497bc292..a47cb9dc19 100644
---- a/xen/arch/x86/x86_64/compat/entry.S
-+++ b/xen/arch/x86/x86_64/compat/entry.S
-@@ -39,6 +39,12 @@ ENTRY(compat_test_all_events)
-         leaq  irq_stat+IRQSTAT_softirq_pending(%rip),%rcx
-         cmpl  $0,(%rcx,%rax,1)
-         jne   compat_process_softirqs
-+
-+        /* Inject exception if pending. */
-+        lea   VCPU_trap_bounce(%rbx), %rdx
-+        testb $TBF_EXCEPTION, TRAPBOUNCE_flags(%rdx)
-+        jnz   .Lcompat_process_trapbounce
-+
-         testb $1,VCPU_mce_pending(%rbx)
-         jnz   compat_process_mce
- .Lcompat_test_guest_nmi:
-@@ -68,15 +74,24 @@ compat_process_softirqs:
-         call  do_softirq
-         jmp   compat_test_all_events
- 
-+        ALIGN
-+/* %rbx: struct vcpu, %rdx: struct trap_bounce */
-+.Lcompat_process_trapbounce:
-+        sti
-+.Lcompat_bounce_exception:
-+        call  compat_create_bounce_frame
-+        movb  $0, TRAPBOUNCE_flags(%rdx)
-+        jmp   compat_test_all_events
-+
- 	ALIGN
- /* %rbx: struct vcpu */
- compat_process_mce:
-         testb $1 << VCPU_TRAP_MCE,VCPU_async_exception_mask(%rbx)
-         jnz   .Lcompat_test_guest_nmi
-         sti
--        movb $0,VCPU_mce_pending(%rbx)
--        call set_guest_machinecheck_trapbounce
--        testl %eax,%eax
-+        movb  $0, VCPU_mce_pending(%rbx)
-+        call  set_guest_machinecheck_trapbounce
-+        test  %al, %al
-         jz    compat_test_all_events
-         movzbl VCPU_async_exception_mask(%rbx),%edx # save mask for the
-         movb %dl,VCPU_mce_old_mask(%rbx)            # iret hypercall
-@@ -88,11 +103,11 @@ compat_process_mce:
- /* %rbx: struct vcpu */
- compat_process_nmi:
-         testb $1 << VCPU_TRAP_NMI,VCPU_async_exception_mask(%rbx)
--        jnz  compat_test_guest_events
-+        jnz   compat_test_guest_events
-         sti
--        movb  $0,VCPU_nmi_pending(%rbx)
-+        movb  $0, VCPU_nmi_pending(%rbx)
-         call  set_guest_nmi_trapbounce
--        testl %eax,%eax
-+        test  %al, %al
-         jz    compat_test_all_events
-         movzbl VCPU_async_exception_mask(%rbx),%edx # save mask for the
-         movb %dl,VCPU_nmi_old_mask(%rbx)            # iret hypercall
-@@ -189,15 +204,6 @@ ENTRY(cr4_pv32_restore)
-         xor   %eax, %eax
-         ret
- 
--/* %rdx: trap_bounce, %rbx: struct vcpu */
--ENTRY(compat_post_handle_exception)
--        testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
--        jz    compat_test_all_events
--.Lcompat_bounce_exception:
--        call  compat_create_bounce_frame
--        movb  $0,TRAPBOUNCE_flags(%rdx)
--        jmp   compat_test_all_events
--
-         .section .text.entry, "ax", @progbits
- 
- /* See lstar_enter for entry register state. */
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index bdd33e727f..41d3ec21a1 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -42,6 +42,12 @@ test_all_events:
-         leaq  irq_stat+IRQSTAT_softirq_pending(%rip), %rcx
-         cmpl  $0, (%rcx, %rax, 1)
-         jne   process_softirqs
-+
-+        /* Inject exception if pending. */
-+        lea   VCPU_trap_bounce(%rbx), %rdx
-+        testb $TBF_EXCEPTION, TRAPBOUNCE_flags(%rdx)
-+        jnz   .Lprocess_trapbounce
-+
-         cmpb  $0, VCPU_mce_pending(%rbx)
-         jne   process_mce
- .Ltest_guest_nmi:
-@@ -69,6 +75,15 @@ process_softirqs:
-         call do_softirq
-         jmp  test_all_events
- 
-+        ALIGN
-+/* %rbx: struct vcpu, %rdx struct trap_bounce */
-+.Lprocess_trapbounce:
-+        sti
-+.Lbounce_exception:
-+        call  create_bounce_frame
-+        movb  $0, TRAPBOUNCE_flags(%rdx)
-+        jmp   test_all_events
-+
-         ALIGN
- /* %rbx: struct vcpu */
- process_mce:
-@@ -77,7 +92,7 @@ process_mce:
-         sti
-         movb $0, VCPU_mce_pending(%rbx)
-         call set_guest_machinecheck_trapbounce
--        test %eax, %eax
-+        test %al, %al
-         jz   test_all_events
-         movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
-         movb %dl, VCPU_mce_old_mask(%rbx)            # iret hypercall
-@@ -93,7 +108,7 @@ process_nmi:
-         sti
-         movb $0, VCPU_nmi_pending(%rbx)
-         call set_guest_nmi_trapbounce
--        test %eax, %eax
-+        test %al, %al
-         jz   test_all_events
-         movzbl VCPU_async_exception_mask(%rbx), %edx # save mask for the
-         movb %dl, VCPU_nmi_old_mask(%rbx)            # iret hypercall
-@@ -667,15 +682,9 @@ handle_exception_saved:
-         mov   %r15, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
-         testb $3,UREGS_cs(%rsp)
-         jz    restore_all_xen
--        leaq  VCPU_trap_bounce(%rbx),%rdx
-         movq  VCPU_domain(%rbx),%rax
-         testb $1,DOMAIN_is_32bit_pv(%rax)
--        jnz   compat_post_handle_exception
--        testb $TBF_EXCEPTION,TRAPBOUNCE_flags(%rdx)
--        jz    test_all_events
--.Lbounce_exception:
--        call  create_bounce_frame
--        movb  $0,TRAPBOUNCE_flags(%rdx)
-+        jnz   compat_test_all_events
-         jmp   test_all_events
- 
- /* No special register assumptions. */
-@@ -730,7 +739,7 @@ ENTRY(device_not_available)
- ENTRY(debug)
-         pushq $0
-         movl  $TRAP_debug,4(%rsp)
--        jmp   handle_exception
-+        jmp   handle_ist_exception
- 
- ENTRY(int3)
-         pushq $0
-@@ -783,12 +792,14 @@ ENTRY(double_fault)
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         mov   STACK_CPUINFO_FIELD(xen_cr3)(%r14), %rbx
--        test  %rbx, %rbx
-+        neg   %rbx
-         jz    .Ldblf_cr3_okay
-         jns   .Ldblf_cr3_load
-+        mov   %rbx, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
-         neg   %rbx
- .Ldblf_cr3_load:
-         mov   %rbx, %cr3
-+        movq $0, STACK_CPUINFO_FIELD(xen_cr3)(%r14)
- .Ldblf_cr3_okay:
- 
-         movq  %rsp,%rdi
-diff --git a/xen/arch/x86/x86_emulate.c b/xen/arch/x86/x86_emulate.c
-index c7ba221d11..9125c67c9e 100644
---- a/xen/arch/x86/x86_emulate.c
-+++ b/xen/arch/x86/x86_emulate.c
-@@ -14,6 +14,7 @@
- #include <asm/processor.h> /* current_cpu_info */
- #include <asm/xstate.h>
- #include <asm/amd.h> /* cpu_has_amd_erratum() */
-+#include <asm/debugreg.h>
- 
- /* Avoid namespace pollution. */
- #undef cmpxchg
-@@ -41,3 +42,75 @@
- })
- 
- #include "x86_emulate/x86_emulate.c"
-+
-+/* Called with NULL ctxt in hypercall context. */
-+int x86emul_read_dr(unsigned int reg, unsigned long *val,
-+                    struct x86_emulate_ctxt *ctxt)
-+{
-+    struct vcpu *curr = current;
-+
-+    /* HVM support requires a bit more plumbing before it will work. */
-+    ASSERT(is_pv_vcpu(curr));
-+
-+    switch ( reg )
-+    {
-+    case 0 ... 3:
-+    case 6:
-+        *val = curr->arch.debugreg[reg];
-+        break;
-+
-+    case 7:
-+        *val = (curr->arch.debugreg[7] |
-+                curr->arch.debugreg[5]);
-+        break;
-+
-+    case 4 ... 5:
-+        if ( !(curr->arch.pv_vcpu.ctrlreg[4] & X86_CR4_DE) )
-+        {
-+            *val = curr->arch.debugreg[reg + 2];
-+            break;
-+        }
-+
-+        /* Fallthrough */
-+    default:
-+        if ( ctxt )
-+            x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
-+
-+        return X86EMUL_EXCEPTION;
-+    }
-+
-+    return X86EMUL_OKAY;
-+}
-+
-+int x86emul_write_dr(unsigned int reg, unsigned long val,
-+                     struct x86_emulate_ctxt *ctxt)
-+{
-+    struct vcpu *curr = current;
-+
-+    /* HVM support requires a bit more plumbing before it will work. */
-+    ASSERT(is_pv_vcpu(curr));
-+
-+    switch ( set_debugreg(curr, reg, val) )
-+    {
-+    case 0:
-+        return X86EMUL_OKAY;
-+
-+    case -ENODEV:
-+        x86_emul_hw_exception(TRAP_invalid_op, X86_EVENT_NO_EC, ctxt);
-+        return X86EMUL_EXCEPTION;
-+
-+    default:
-+        x86_emul_hw_exception(TRAP_gp_fault, 0, ctxt);
-+        return X86EMUL_EXCEPTION;
-+    }
-+}
-+
-+/*
-+ * Local variables:
-+ * mode: C
-+ * c-file-style: "BSD"
-+ * c-basic-offset: 4
-+ * tab-width: 4
-+ * indent-tabs-mode: nil
-+ * End:
-+ */
-diff --git a/xen/arch/x86/x86_emulate/x86_emulate.h b/xen/arch/x86/x86_emulate/x86_emulate.h
-index 0c8c80ad5a..9c2bb8157c 100644
---- a/xen/arch/x86/x86_emulate/x86_emulate.h
-+++ b/xen/arch/x86/x86_emulate/x86_emulate.h
-@@ -662,6 +662,11 @@ static inline void x86_emulate_free_state(struct x86_emulate_state *state) {}
- void x86_emulate_free_state(struct x86_emulate_state *state);
- #endif
- 
-+int x86emul_read_dr(unsigned int reg, unsigned long *val,
-+                    struct x86_emulate_ctxt *ctxt);
-+int x86emul_write_dr(unsigned int reg, unsigned long val,
-+                     struct x86_emulate_ctxt *ctxt);
-+
- #endif
- 
- static inline void x86_emul_hw_exception(
-diff --git a/xen/common/schedule.c b/xen/common/schedule.c
-index b7884263f2..f21c3e5a64 100644
---- a/xen/common/schedule.c
-+++ b/xen/common/schedule.c
-@@ -436,14 +436,9 @@ void sched_destroy_domain(struct domain *d)
-     cpupool_rm_domain(d);
- }
- 
--void vcpu_sleep_nosync(struct vcpu *v)
-+void vcpu_sleep_nosync_locked(struct vcpu *v)
- {
--    unsigned long flags;
--    spinlock_t *lock;
--
--    TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
--
--    lock = vcpu_schedule_lock_irqsave(v, &flags);
-+    ASSERT(spin_is_locked(per_cpu(schedule_data,v->processor).schedule_lock));
- 
-     if ( likely(!vcpu_runnable(v)) )
-     {
-@@ -452,6 +447,18 @@ void vcpu_sleep_nosync(struct vcpu *v)
- 
-         SCHED_OP(vcpu_scheduler(v), sleep, v);
-     }
-+}
-+
-+void vcpu_sleep_nosync(struct vcpu *v)
-+{
-+    unsigned long flags;
-+    spinlock_t *lock;
-+
-+    TRACE_2D(TRC_SCHED_SLEEP, v->domain->domain_id, v->vcpu_id);
-+
-+    lock = vcpu_schedule_lock_irqsave(v, &flags);
-+
-+    vcpu_sleep_nosync_locked(v);
- 
-     vcpu_schedule_unlock_irqrestore(lock, flags, v);
- }
-@@ -567,13 +574,54 @@ static void vcpu_move_nosched(struct vcpu *v, unsigned int new_cpu)
-     sched_move_irqs(v);
- }
- 
--static void vcpu_migrate(struct vcpu *v)
-+/*
-+ * Initiating migration
-+ *
-+ * In order to migrate, we need the vcpu in question to have stopped
-+ * running and had SCHED_OP(sleep) called (to take it off any
-+ * runqueues, for instance); and if it is currently running, it needs
-+ * to be scheduled out.  Finally, we need to hold the scheduling locks
-+ * for both the processor we're migrating from, and the processor
-+ * we're migrating to.
-+ *
-+ * In order to avoid deadlock while satisfying the final requirement,
-+ * we must release any scheduling lock we hold, then try to grab both
-+ * locks we want, then double-check to make sure that what we started
-+ * to do hasn't been changed in the mean time.
-+ *
-+ * These steps are encapsulated in the following two functions; they
-+ * should be called like this:
-+ *
-+ *     lock = vcpu_schedule_lock_irq(v);
-+ *     vcpu_migrate_start(v);
-+ *     vcpu_schedule_unlock_irq(lock, v)
-+ *     vcpu_migrate_finish(v);
-+ *
-+ * vcpu_migrate_finish() will do the work now if it can, or simply
-+ * return if it can't (because v is still running); in that case
-+ * vcpu_migrate_finish() will be called by context_saved().
-+ */
-+void vcpu_migrate_start(struct vcpu *v)
-+{
-+    set_bit(_VPF_migrating, &v->pause_flags);
-+    vcpu_sleep_nosync_locked(v);
-+}
-+
-+static void vcpu_migrate_finish(struct vcpu *v)
- {
-     unsigned long flags;
-     unsigned int old_cpu, new_cpu;
-     spinlock_t *old_lock, *new_lock;
-     bool_t pick_called = 0;
- 
-+    /*
-+     * If the vcpu is currently running, this will be handled by
-+     * context_saved(); and in any case, if the bit is cleared, then
-+     * someone else has already done the work so we don't need to.
-+     */
-+    if ( v->is_running || !test_bit(_VPF_migrating, &v->pause_flags) )
-+        return;
-+
-     old_cpu = new_cpu = v->processor;
-     for ( ; ; )
-     {
-@@ -653,14 +701,11 @@ void vcpu_force_reschedule(struct vcpu *v)
-     spinlock_t *lock = vcpu_schedule_lock_irq(v);
- 
-     if ( v->is_running )
--        set_bit(_VPF_migrating, &v->pause_flags);
-+        vcpu_migrate_start(v);
-+
-     vcpu_schedule_unlock_irq(lock, v);
- 
--    if ( v->pause_flags & VPF_migrating )
--    {
--        vcpu_sleep_nosync(v);
--        vcpu_migrate(v);
--    }
-+    vcpu_migrate_finish(v);
- }
- 
- void restore_vcpu_affinity(struct domain *d)
-@@ -812,10 +857,10 @@ int cpu_disable_scheduler(unsigned int cpu)
-                  *  * the scheduler will always fine a suitable solution, or
-                  *    things would have failed before getting in here.
-                  */
--                set_bit(_VPF_migrating, &v->pause_flags);
-+                vcpu_migrate_start(v);
-                 vcpu_schedule_unlock_irqrestore(lock, flags, v);
--                vcpu_sleep_nosync(v);
--                vcpu_migrate(v);
-+
-+                vcpu_migrate_finish(v);
- 
-                 /*
-                  * The only caveat, in this case, is that if a vcpu active in
-@@ -849,18 +894,14 @@ static int vcpu_set_affinity(
-          * Always ask the scheduler to re-evaluate placement
-          * when changing the affinity.
-          */
--        set_bit(_VPF_migrating, &v->pause_flags);
-+        vcpu_migrate_start(v);
-     }
- 
-     vcpu_schedule_unlock_irq(lock, v);
- 
-     domain_update_node_affinity(v->domain);
- 
--    if ( v->pause_flags & VPF_migrating )
--    {
--        vcpu_sleep_nosync(v);
--        vcpu_migrate(v);
--    }
-+    vcpu_migrate_finish(v);
- 
-     return ret;
- }
-@@ -1088,7 +1129,6 @@ int vcpu_pin_override(struct vcpu *v, int cpu)
-         {
-             cpumask_copy(v->cpu_hard_affinity, v->cpu_hard_affinity_saved);
-             v->affinity_broken = 0;
--            set_bit(_VPF_migrating, &v->pause_flags);
-             ret = 0;
-         }
-     }
-@@ -1101,20 +1141,18 @@ int vcpu_pin_override(struct vcpu *v, int cpu)
-             cpumask_copy(v->cpu_hard_affinity_saved, v->cpu_hard_affinity);
-             v->affinity_broken = 1;
-             cpumask_copy(v->cpu_hard_affinity, cpumask_of(cpu));
--            set_bit(_VPF_migrating, &v->pause_flags);
-             ret = 0;
-         }
-     }
- 
-+    if ( ret == 0 )
-+        vcpu_migrate_start(v);
-+
-     vcpu_schedule_unlock_irq(lock, v);
- 
-     domain_update_node_affinity(v->domain);
- 
--    if ( v->pause_flags & VPF_migrating )
--    {
--        vcpu_sleep_nosync(v);
--        vcpu_migrate(v);
--    }
-+    vcpu_migrate_finish(v);
- 
-     return ret;
- }
-@@ -1501,8 +1539,7 @@ void context_saved(struct vcpu *prev)
- 
-     SCHED_OP(vcpu_scheduler(prev), context_saved, prev);
- 
--    if ( unlikely(prev->pause_flags & VPF_migrating) )
--        vcpu_migrate(prev);
-+    vcpu_migrate_finish(prev);
- }
- 
- /* The scheduler timer: force a run through the scheduler */
-diff --git a/xen/include/asm-x86/debugreg.h b/xen/include/asm-x86/debugreg.h
-index c57914efc6..b3b10eaf40 100644
---- a/xen/include/asm-x86/debugreg.h
-+++ b/xen/include/asm-x86/debugreg.h
-@@ -24,6 +24,8 @@
- #define DR_STATUS_RESERVED_ZERO (~0xffffeffful) /* Reserved, read as zero */
- #define DR_STATUS_RESERVED_ONE  0xffff0ff0ul /* Reserved, read as one */
- 
-+#define X86_DR6_DEFAULT 0xffff0ff0ul    /* Default %dr6 value. */
-+
- /* Now define a bunch of things for manipulating the control register.
-    The top two bytes of the control register consist of 4 fields of 4
-    bits - each field corresponds to one of the four debug registers,
-diff --git a/xen/include/asm-x86/hvm/irq.h b/xen/include/asm-x86/hvm/irq.h
-index f756cb5a0d..1a52ec6045 100644
---- a/xen/include/asm-x86/hvm/irq.h
-+++ b/xen/include/asm-x86/hvm/irq.h
-@@ -207,6 +207,9 @@ int hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
- 
- int hvm_inject_msi(struct domain *d, uint64_t addr, uint32_t data);
- 
-+/* Assert an IO APIC pin. */
-+int hvm_ioapic_assert(struct domain *d, unsigned int gsi, bool level);
-+
- void hvm_maybe_deassert_evtchn_irq(void);
- void hvm_assert_evtchn_irq(struct vcpu *v);
- void hvm_set_callback_via(struct domain *d, uint64_t via);
-diff --git a/xen/include/asm-x86/hvm/vpt.h b/xen/include/asm-x86/hvm/vpt.h
-index 21166edd06..0eb5ff632e 100644
---- a/xen/include/asm-x86/hvm/vpt.h
-+++ b/xen/include/asm-x86/hvm/vpt.h
-@@ -44,6 +44,7 @@ struct periodic_time {
-     bool_t warned_timeout_too_short;
- #define PTSRC_isa    1 /* ISA time source */
- #define PTSRC_lapic  2 /* LAPIC time source */
-+#define PTSRC_ioapic 3 /* IOAPIC time source */
-     u8 source;                  /* PTSRC_ */
-     u8 irq;
-     struct vcpu *vcpu;          /* vcpu timer interrupt delivers to */
-diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
-index a8ceecf3e2..68fae91567 100644
---- a/xen/include/asm-x86/msr-index.h
-+++ b/xen/include/asm-x86/msr-index.h
-@@ -31,6 +31,9 @@
- #define EFER_LMSLE		(1<<_EFER_LMSLE)
- #define EFER_FFXSE		(1<<_EFER_FFXSE)
- 
-+#define EFER_KNOWN_MASK		(EFER_SCE | EFER_LME | EFER_LMA | EFER_NX | \
-+				 EFER_SVME | EFER_LMSLE | EFER_FFXSE)
-+
- /* Speculation Controls. */
- #define MSR_SPEC_CTRL			0x00000048
- #define SPEC_CTRL_IBRS			(_AC(1, ULL) << 0)
-@@ -42,6 +45,7 @@
- #define MSR_ARCH_CAPABILITIES		0x0000010a
- #define ARCH_CAPABILITIES_RDCL_NO	(_AC(1, ULL) << 0)
- #define ARCH_CAPABILITIES_IBRS_ALL	(_AC(1, ULL) << 1)
-+#define ARCH_CAPS_RSBA			(_AC(1, ULL) << 2)
- 
- /* Intel MSRs. Some also available on other CPUs */
- #define MSR_IA32_PERFCTR0		0x000000c1
-diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
-index 80f8411355..a152f1d413 100644
---- a/xen/include/asm-x86/processor.h
-+++ b/xen/include/asm-x86/processor.h
-@@ -445,7 +445,8 @@ struct __packed __cacheline_aligned tss_struct {
- #define IST_DF   1UL
- #define IST_NMI  2UL
- #define IST_MCE  3UL
--#define IST_MAX  3UL
-+#define IST_DB   4UL
-+#define IST_MAX  4UL
- 
- /* Set the interrupt stack table used by a particular interrupt
-  * descriptor table entry. */
diff --git a/system/xen/xen.SlackBuild b/system/xen/xen.SlackBuild
index 07b3865e90..29c2ddf3cd 100644
--- a/system/xen/xen.SlackBuild
+++ b/system/xen/xen.SlackBuild
@@ -23,11 +23,11 @@
 #  ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 PRGNAM=xen
-VERSION=${VERSION:-4.10.1}
-BUILD=${BUILD:-2}
+VERSION=${VERSION:-4.11.0}
+BUILD=${BUILD:-1}
 TAG=${TAG:-_SBo}
 
-SEABIOS=${SEABIOS:-1.10.2}
+SEABIOS=${SEABIOS:-1.11.1}
 OVMF=${OVMF:-20170920_947f3737a}
 IPXE=${IPXE:-356f6c1b64d7a97746d1816cef8ca22bdd8d0b5d}
 
@@ -125,9 +125,6 @@ find -L . \
  \( -perm 666 -o -perm 664 -o -perm 640 -o -perm 600 -o -perm 444 \
   -o -perm 440 -o -perm 400 \) -exec chmod 644 {} \;
 
-# Needed by XSA-263 (for now...)
-patch -p1 <$CWD/patches/xen-4.10.2-pre.patch
-
 # Apply Xen Security Advisory patches
 for i in $CWD/xsa/* ; do
   case $i in
@@ -145,7 +142,7 @@ cp $CWD/ipxe-git-$IPXE.tar.gz tools/firmware/etherboot/_ipxe.tar.gz
 (
   # Seabios
   cd tools/firmware
-  tar -xf $CWD/seabios-$SEABIOS.tar.gz
+  tar -xf $CWD/seabios-$SEABIOS.tar.xz
   mv seabios-$SEABIOS seabios-dir-remote
   ln -s seabios-dir-remote seabios-dir
   make -C seabios-dir defconfig
diff --git a/system/xen/xen.info b/system/xen/xen.info
index 906a067f60..2e6b41b582 100644
--- a/system/xen/xen.info
+++ b/system/xen/xen.info
@@ -1,7 +1,7 @@
 PRGNAM="xen"
-VERSION="4.10.1"
+VERSION="4.11.0"
 HOMEPAGE="http://www.xenproject.org/"
-DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.10.1.tar.gz \
+DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.11.0.tar.gz \
           http://mirror.slackware.hr/sources/xen-extfiles/ipxe-git-356f6c1b64d7a97746d1816cef8ca22bdd8d0b5d.tar.gz \
           http://mirror.slackware.hr/sources/xen-extfiles/lwip-1.3.0.tar.gz \
           http://mirror.slackware.hr/sources/xen-extfiles/zlib-1.2.3.tar.gz \
@@ -11,9 +11,9 @@ DOWNLOAD="http://mirror.slackware.hr/sources/xen/xen-4.10.1.tar.gz \
           http://mirror.slackware.hr/sources/xen-extfiles/polarssl-1.1.4-gpl.tgz \
           http://mirror.slackware.hr/sources/xen-extfiles/gmp-4.3.2.tar.bz2 \
           http://mirror.slackware.hr/sources/xen-extfiles/tpm_emulator-0.7.4.tar.gz \
-          http://mirror.slackware.hr/sources/xen-seabios/seabios-1.10.2.tar.gz \
+          http://mirror.slackware.hr/sources/xen-seabios/seabios-1.11.1.tar.xz \
           http://mirror.slackware.hr/sources/xen-ovmf/xen-ovmf-20170920_947f3737a.tar.bz2"
-MD5SUM="d1b1d14ce76622062c9977d9c8ba772e \
+MD5SUM="cbec0600284921744bc14119f4ed3fff \
         0061f103c84b25c2e6ac47649b909bde \
         36cc57650cffda9a0269493be2a169bb \
         debc62758716a169df9f62e6ab2bc634 \
@@ -23,7 +23,7 @@ MD5SUM="d1b1d14ce76622062c9977d9c8ba772e \
         7b72caf22b01464ee7d6165f2fd85f44 \
         dd60683d7057917e34630b4a787932e8 \
         e26becb8a6a2b6695f6b3e8097593db8 \
-        03387d3c84c7f43d6b8ab894155e1289 \
+        d63261e22f6423972fd484d995c85f9a \
         779a40b927fb78a0d1732bb688d7a257"
 DOWNLOAD_x86_64=""
 MD5SUM_x86_64=""
diff --git a/system/xen/xsa/xsa263-4.10-0001-x86-spec_ctrl-Read-MSR_ARCH_CAPABILITIES-only-once.patch b/system/xen/xsa/xsa263-4.10-0001-x86-spec_ctrl-Read-MSR_ARCH_CAPABILITIES-only-once.patch
deleted file mode 100644
index c26afebc20..0000000000
--- a/system/xen/xsa/xsa263-4.10-0001-x86-spec_ctrl-Read-MSR_ARCH_CAPABILITIES-only-once.patch
+++ /dev/null
@@ -1,110 +0,0 @@
-From 13fafdf5c97d3bc2a8851c4d1796feac0f82d498 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 26 Apr 2018 12:21:00 +0100
-Subject: [PATCH] x86/spec_ctrl: Read MSR_ARCH_CAPABILITIES only once
-
-Make it available from the beginning of init_speculation_mitigations(), and
-pass it into appropriate functions.  Fix an RSBA typo while moving the
-affected comment.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
-(cherry picked from commit d6c65187252a6c1810fd24c4d46f812840de8d3c)
----
- xen/arch/x86/spec_ctrl.c | 34 ++++++++++++++--------------------
- 1 file changed, 14 insertions(+), 20 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index fa67a0f..dc90743 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -81,18 +81,15 @@ static int __init parse_bti(const char *s)
- }
- custom_param("bti", parse_bti);
- 
--static void __init print_details(enum ind_thunk thunk)
-+static void __init print_details(enum ind_thunk thunk, uint64_t caps)
- {
-     unsigned int _7d0 = 0, e8b = 0, tmp;
--    uint64_t caps = 0;
- 
-     /* Collect diagnostics about available mitigations. */
-     if ( boot_cpu_data.cpuid_level >= 7 )
-         cpuid_count(7, 0, &tmp, &tmp, &tmp, &_7d0);
-     if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 )
-         cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp);
--    if ( _7d0 & cpufeat_mask(X86_FEATURE_ARCH_CAPS) )
--        rdmsrl(MSR_ARCH_CAPABILITIES, caps);
- 
-     printk(XENLOG_DEBUG "Speculative mitigation facilities:\n");
- 
-@@ -125,7 +122,7 @@ static void __init print_details(enum ind_thunk thunk)
- }
- 
- /* Calculate whether Retpoline is known-safe on this CPU. */
--static bool __init retpoline_safe(void)
-+static bool __init retpoline_safe(uint64_t caps)
- {
-     unsigned int ucode_rev = this_cpu(ucode_cpu_info).cpu_sig.rev;
- 
-@@ -136,19 +133,12 @@ static bool __init retpoline_safe(void)
-          boot_cpu_data.x86 != 6 )
-         return false;
- 
--    if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
--    {
--        uint64_t caps;
--
--        rdmsrl(MSR_ARCH_CAPABILITIES, caps);
--
--        /*
--         * RBSA may be set by a hypervisor to indicate that we may move to a
--         * processor which isn't retpoline-safe.
--         */
--        if ( caps & ARCH_CAPS_RSBA )
--            return false;
--    }
-+    /*
-+     * RSBA may be set by a hypervisor to indicate that we may move to a
-+     * processor which isn't retpoline-safe.
-+     */
-+    if ( caps & ARCH_CAPS_RSBA )
-+        return false;
- 
-     switch ( boot_cpu_data.x86_model )
-     {
-@@ -218,6 +208,10 @@ void __init init_speculation_mitigations(void)
- {
-     enum ind_thunk thunk = THUNK_DEFAULT;
-     bool ibrs = false;
-+    uint64_t caps = 0;
-+
-+    if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
-+        rdmsrl(MSR_ARCH_CAPABILITIES, caps);
- 
-     /*
-      * Has the user specified any custom BTI mitigations?  If so, follow their
-@@ -246,7 +240,7 @@ void __init init_speculation_mitigations(void)
-              * On Intel hardware, we'd like to use retpoline in preference to
-              * IBRS, but only if it is safe on this hardware.
-              */
--            else if ( retpoline_safe() )
-+            else if ( retpoline_safe(caps) )
-                 thunk = THUNK_RETPOLINE;
-             else if ( boot_cpu_has(X86_FEATURE_IBRSB) )
-                 ibrs = true;
-@@ -331,7 +325,7 @@ void __init init_speculation_mitigations(void)
-     /* (Re)init BSP state now that default_bti_ist_info has been calculated. */
-     init_shadow_spec_ctrl_state();
- 
--    print_details(thunk);
-+    print_details(thunk, caps);
- }
- 
- static void __init __maybe_unused build_assertions(void)
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0002-x86-spec_ctrl-Express-Xen-s-choice-of-MSR_SPEC_CTRL-.patch b/system/xen/xsa/xsa263-4.10-0002-x86-spec_ctrl-Express-Xen-s-choice-of-MSR_SPEC_CTRL-.patch
deleted file mode 100644
index a954943b71..0000000000
--- a/system/xen/xsa/xsa263-4.10-0002-x86-spec_ctrl-Express-Xen-s-choice-of-MSR_SPEC_CTRL-.patch
+++ /dev/null
@@ -1,138 +0,0 @@
-From d7b345e4ca136a995bfaaf2ee20901ee20e63570 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 17 Apr 2018 14:15:04 +0100
-Subject: [PATCH] x86/spec_ctrl: Express Xen's choice of MSR_SPEC_CTRL value as
- a variable
-
-At the moment, we have two different encodings of Xen's MSR_SPEC_CTRL value,
-which is a side effect of how the Spectre series developed.  One encoding is
-via an alias with the bottom bit of bti_ist_info, and can encode IBRS or not,
-but not other configurations such as STIBP.
-
-Break Xen's value out into a separate variable (in the top of stack block for
-XPTI reasons) and use this instead of bti_ist_info in the IST path.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
-(cherry picked from commit 66dfae0f32bfbc899c2f3446d5ee57068cb7f957)
----
- xen/arch/x86/spec_ctrl.c            | 8 +++++---
- xen/arch/x86/x86_64/asm-offsets.c   | 1 +
- xen/include/asm-x86/current.h       | 1 +
- xen/include/asm-x86/spec_ctrl.h     | 2 ++
- xen/include/asm-x86/spec_ctrl_asm.h | 8 ++------
- 5 files changed, 11 insertions(+), 9 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index dc90743..1143521 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -38,6 +38,7 @@ static int8_t __initdata opt_ibrs = -1;
- static bool __initdata opt_rsb_native = true;
- static bool __initdata opt_rsb_vmexit = true;
- bool __read_mostly opt_ibpb = true;
-+uint8_t __read_mostly default_xen_spec_ctrl;
- uint8_t __read_mostly default_bti_ist_info;
- 
- static int __init parse_bti(const char *s)
-@@ -285,11 +286,14 @@ void __init init_speculation_mitigations(void)
-          * guests.
-          */
-         if ( ibrs )
-+        {
-+            default_xen_spec_ctrl |= SPEC_CTRL_IBRS;
-             setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_SET);
-+        }
-         else
-             setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR);
- 
--        default_bti_ist_info |= BTI_IST_WRMSR | ibrs;
-+        default_bti_ist_info |= BTI_IST_WRMSR;
-     }
- 
-     /*
-@@ -330,8 +334,6 @@ void __init init_speculation_mitigations(void)
- 
- static void __init __maybe_unused build_assertions(void)
- {
--    /* The optimised assembly relies on this alias. */
--    BUILD_BUG_ON(BTI_IST_IBRS != SPEC_CTRL_IBRS);
- }
- 
- /*
-diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
-index 13478d4..0726147 100644
---- a/xen/arch/x86/x86_64/asm-offsets.c
-+++ b/xen/arch/x86/x86_64/asm-offsets.c
-@@ -142,6 +142,7 @@ void __dummy__(void)
-     OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3);
-     OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3);
-     OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl);
-+    OFFSET(CPUINFO_xen_spec_ctrl, struct cpu_info, xen_spec_ctrl);
-     OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl);
-     OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info);
-     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
-diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
-index 4678a0f..d10b13c 100644
---- a/xen/include/asm-x86/current.h
-+++ b/xen/include/asm-x86/current.h
-@@ -56,6 +56,7 @@ struct cpu_info {
- 
-     /* See asm-x86/spec_ctrl_asm.h for usage. */
-     unsigned int shadow_spec_ctrl;
-+    uint8_t      xen_spec_ctrl;
-     bool         use_shadow_spec_ctrl;
-     uint8_t      bti_ist_info;
- 
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 5ab4ff3..5e4fc84 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -27,6 +27,7 @@
- void init_speculation_mitigations(void);
- 
- extern bool opt_ibpb;
-+extern uint8_t default_xen_spec_ctrl;
- extern uint8_t default_bti_ist_info;
- 
- static inline void init_shadow_spec_ctrl_state(void)
-@@ -34,6 +35,7 @@ static inline void init_shadow_spec_ctrl_state(void)
-     struct cpu_info *info = get_cpu_info();
- 
-     info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0;
-+    info->xen_spec_ctrl = default_xen_spec_ctrl;
-     info->bti_ist_info = default_bti_ist_info;
- }
- 
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 1f2b6f3..697da13 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -21,7 +21,6 @@
- #define __X86_SPEC_CTRL_ASM_H__
- 
- /* Encoding of the bottom bits in cpuinfo.bti_ist_info */
--#define BTI_IST_IBRS  (1 << 0)
- #define BTI_IST_WRMSR (1 << 1)
- #define BTI_IST_RSB   (1 << 2)
- 
-@@ -286,12 +285,9 @@
-     setz %dl
-     and %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14)
- 
--    /*
--     * Load Xen's intended value.  SPEC_CTRL_IBRS vs 0 is encoded in the
--     * bottom bit of bti_ist_info, via a deliberate alias with BTI_IST_IBRS.
--     */
-+    /* Load Xen's intended value. */
-     mov $MSR_SPEC_CTRL, %ecx
--    and $BTI_IST_IBRS, %eax
-+    movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax
-     xor %edx, %edx
-     wrmsr
- 
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0003-x86-spec_ctrl-Merge-bti_ist_info-and-use_shadow_spec.patch b/system/xen/xsa/xsa263-4.10-0003-x86-spec_ctrl-Merge-bti_ist_info-and-use_shadow_spec.patch
deleted file mode 100644
index 7468f9ce28..0000000000
--- a/system/xen/xsa/xsa263-4.10-0003-x86-spec_ctrl-Merge-bti_ist_info-and-use_shadow_spec.patch
+++ /dev/null
@@ -1,340 +0,0 @@
-From a0c2f734b4c683cb407e10ff943671c413480287 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 17 Apr 2018 14:15:04 +0100
-Subject: [PATCH] x86/spec_ctrl: Merge bti_ist_info and use_shadow_spec_ctrl
- into spec_ctrl_flags
-
-All 3 bits of information here are control flags for the entry/exit code
-behaviour.  Treat them as such, rather than having two different variables.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
-(cherry picked from commit 5262ba2e7799001402dfe139ff944e035dfff928)
----
- xen/arch/x86/acpi/power.c           |  4 +--
- xen/arch/x86/spec_ctrl.c            | 10 ++++---
- xen/arch/x86/x86_64/asm-offsets.c   |  3 +--
- xen/include/asm-x86/current.h       |  3 +--
- xen/include/asm-x86/nops.h          |  5 ++--
- xen/include/asm-x86/spec_ctrl.h     | 10 +++----
- xen/include/asm-x86/spec_ctrl_asm.h | 52 ++++++++++++++++++++-----------------
- 7 files changed, 45 insertions(+), 42 deletions(-)
-
-diff --git a/xen/arch/x86/acpi/power.c b/xen/arch/x86/acpi/power.c
-index f7085d3..f3480aa 100644
---- a/xen/arch/x86/acpi/power.c
-+++ b/xen/arch/x86/acpi/power.c
-@@ -215,7 +215,7 @@ static int enter_state(u32 state)
-     ci = get_cpu_info();
-     spec_ctrl_enter_idle(ci);
-     /* Avoid NMI/#MC using MSR_SPEC_CTRL until we've reloaded microcode. */
--    ci->bti_ist_info = 0;
-+    ci->spec_ctrl_flags &= ~SCF_ist_wrmsr;
- 
-     ACPI_FLUSH_CPU_CACHE();
- 
-@@ -256,7 +256,7 @@ static int enter_state(u32 state)
-     microcode_resume_cpu(0);
- 
-     /* Re-enabled default NMI/#MC use of MSR_SPEC_CTRL. */
--    ci->bti_ist_info = default_bti_ist_info;
-+    ci->spec_ctrl_flags |= (default_spec_ctrl_flags & SCF_ist_wrmsr);
-     spec_ctrl_exit_idle(ci);
- 
-  done:
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 1143521..2d69910 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -39,7 +39,7 @@ static bool __initdata opt_rsb_native = true;
- static bool __initdata opt_rsb_vmexit = true;
- bool __read_mostly opt_ibpb = true;
- uint8_t __read_mostly default_xen_spec_ctrl;
--uint8_t __read_mostly default_bti_ist_info;
-+uint8_t __read_mostly default_spec_ctrl_flags;
- 
- static int __init parse_bti(const char *s)
- {
-@@ -293,7 +293,7 @@ void __init init_speculation_mitigations(void)
-         else
-             setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR);
- 
--        default_bti_ist_info |= BTI_IST_WRMSR;
-+        default_spec_ctrl_flags |= SCF_ist_wrmsr;
-     }
- 
-     /*
-@@ -312,7 +312,7 @@ void __init init_speculation_mitigations(void)
-     if ( opt_rsb_native )
-     {
-         setup_force_cpu_cap(X86_FEATURE_RSB_NATIVE);
--        default_bti_ist_info |= BTI_IST_RSB;
-+        default_spec_ctrl_flags |= SCF_ist_rsb;
-     }
- 
-     /*
-@@ -326,7 +326,7 @@ void __init init_speculation_mitigations(void)
-     if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) )
-         opt_ibpb = false;
- 
--    /* (Re)init BSP state now that default_bti_ist_info has been calculated. */
-+    /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */
-     init_shadow_spec_ctrl_state();
- 
-     print_details(thunk, caps);
-@@ -334,6 +334,8 @@ void __init init_speculation_mitigations(void)
- 
- static void __init __maybe_unused build_assertions(void)
- {
-+    /* The optimised assembly relies on this alias. */
-+    BUILD_BUG_ON(SCF_use_shadow != 1);
- }
- 
- /*
-diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c
-index 0726147..97242e5 100644
---- a/xen/arch/x86/x86_64/asm-offsets.c
-+++ b/xen/arch/x86/x86_64/asm-offsets.c
-@@ -143,8 +143,7 @@ void __dummy__(void)
-     OFFSET(CPUINFO_pv_cr3, struct cpu_info, pv_cr3);
-     OFFSET(CPUINFO_shadow_spec_ctrl, struct cpu_info, shadow_spec_ctrl);
-     OFFSET(CPUINFO_xen_spec_ctrl, struct cpu_info, xen_spec_ctrl);
--    OFFSET(CPUINFO_use_shadow_spec_ctrl, struct cpu_info, use_shadow_spec_ctrl);
--    OFFSET(CPUINFO_bti_ist_info, struct cpu_info, bti_ist_info);
-+    OFFSET(CPUINFO_spec_ctrl_flags, struct cpu_info, spec_ctrl_flags);
-     DEFINE(CPUINFO_sizeof, sizeof(struct cpu_info));
-     BLANK();
- 
-diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h
-index d10b13c..7afff0e 100644
---- a/xen/include/asm-x86/current.h
-+++ b/xen/include/asm-x86/current.h
-@@ -57,8 +57,7 @@ struct cpu_info {
-     /* See asm-x86/spec_ctrl_asm.h for usage. */
-     unsigned int shadow_spec_ctrl;
-     uint8_t      xen_spec_ctrl;
--    bool         use_shadow_spec_ctrl;
--    uint8_t      bti_ist_info;
-+    uint8_t      spec_ctrl_flags;
- 
-     unsigned long __pad;
-     /* get_stack_bottom() must be 16-byte aligned */
-diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h
-index 37f9819..b744895 100644
---- a/xen/include/asm-x86/nops.h
-+++ b/xen/include/asm-x86/nops.h
-@@ -62,10 +62,9 @@
- #define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
- 
- #define ASM_NOP17 ASM_NOP8; ASM_NOP7; ASM_NOP2
--#define ASM_NOP21 ASM_NOP8; ASM_NOP8; ASM_NOP5
-+#define ASM_NOP22 ASM_NOP8; ASM_NOP8; ASM_NOP6
- #define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8
--#define ASM_NOP29 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP5
--#define ASM_NOP32 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
-+#define ASM_NOP33 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP7; ASM_NOP2
- #define ASM_NOP40 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
- 
- #define ASM_NOP_MAX 8
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 5e4fc84..059e291 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -28,15 +28,15 @@ void init_speculation_mitigations(void);
- 
- extern bool opt_ibpb;
- extern uint8_t default_xen_spec_ctrl;
--extern uint8_t default_bti_ist_info;
-+extern uint8_t default_spec_ctrl_flags;
- 
- static inline void init_shadow_spec_ctrl_state(void)
- {
-     struct cpu_info *info = get_cpu_info();
- 
--    info->shadow_spec_ctrl = info->use_shadow_spec_ctrl = 0;
-+    info->shadow_spec_ctrl = 0;
-     info->xen_spec_ctrl = default_xen_spec_ctrl;
--    info->bti_ist_info = default_bti_ist_info;
-+    info->spec_ctrl_flags = default_spec_ctrl_flags;
- }
- 
- /* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */
-@@ -50,7 +50,7 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
-      */
-     info->shadow_spec_ctrl = val;
-     barrier();
--    info->use_shadow_spec_ctrl = true;
-+    info->spec_ctrl_flags |= SCF_use_shadow;
-     barrier();
-     asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET)
-                    :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" );
-@@ -65,7 +65,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info)
-      * Disable shadowing before updating the MSR.  There are no SMP issues
-      * here; only local processor ordering concerns.
-      */
--    info->use_shadow_spec_ctrl = false;
-+    info->spec_ctrl_flags &= ~SCF_use_shadow;
-     barrier();
-     asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET)
-                    :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" );
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 697da13..39fb4f8 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -20,9 +20,10 @@
- #ifndef __X86_SPEC_CTRL_ASM_H__
- #define __X86_SPEC_CTRL_ASM_H__
- 
--/* Encoding of the bottom bits in cpuinfo.bti_ist_info */
--#define BTI_IST_WRMSR (1 << 1)
--#define BTI_IST_RSB   (1 << 2)
-+/* Encoding of cpuinfo.spec_ctrl_flags */
-+#define SCF_use_shadow (1 << 0)
-+#define SCF_ist_wrmsr  (1 << 1)
-+#define SCF_ist_rsb    (1 << 2)
- 
- #ifdef __ASSEMBLY__
- #include <asm/msr-index.h>
-@@ -49,20 +50,20 @@
-  * after VMEXIT.  The VMEXIT-specific code reads MSR_SPEC_CTRL and updates
-  * current before loading Xen's MSR_SPEC_CTRL setting.
-  *
-- * Factor 2 is harder.  We maintain a shadow_spec_ctrl value, and
-- * use_shadow_spec_ctrl boolean per cpu.  The synchronous use is:
-+ * Factor 2 is harder.  We maintain a shadow_spec_ctrl value, and a use_shadow
-+ * boolean in the per cpu spec_ctrl_flags.  The synchronous use is:
-  *
-  *  1) Store guest value in shadow_spec_ctrl
-- *  2) Set use_shadow_spec_ctrl boolean
-+ *  2) Set the use_shadow boolean
-  *  3) Load guest value into MSR_SPEC_CTRL
-  *  4) Exit to guest
-  *  5) Entry from guest
-- *  6) Clear use_shadow_spec_ctrl boolean
-+ *  6) Clear the use_shadow boolean
-  *  7) Load Xen's value into MSR_SPEC_CTRL
-  *
-  * The asynchronous use for interrupts/exceptions is:
-  *  -  Set/clear IBRS on entry to Xen
-- *  -  On exit to Xen, check use_shadow_spec_ctrl
-+ *  -  On exit to Xen, check use_shadow
-  *  -  If set, load shadow_spec_ctrl
-  *
-  * Therefore, an interrupt/exception which hits the synchronous path between
-@@ -134,7 +135,7 @@
-     xor %edx, %edx
- 
-     /* Clear SPEC_CTRL shadowing *before* loading Xen's value. */
--    movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp)
-+    andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp)
- 
-     /* Load Xen's intended value. */
-     mov $\ibrs_val, %eax
-@@ -160,12 +161,14 @@
-      * block so calculate the position directly.
-      */
-     .if \maybexen
-+        xor %eax, %eax
-         /* Branchless `if ( !xen ) clear_shadowing` */
-         testb $3, UREGS_cs(%rsp)
--        setz %al
--        and %al, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14)
-+        setnz %al
-+        not %eax
-+        and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
-     .else
--        movb %dl, CPUINFO_use_shadow_spec_ctrl(%rsp)
-+        andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp)
-     .endif
- 
-     /* Load Xen's intended value. */
-@@ -184,8 +187,8 @@
-  */
-     xor %edx, %edx
- 
--    cmpb %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%rbx)
--    je .L\@_skip
-+    testb $SCF_use_shadow, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
-+    jz .L\@_skip
- 
-     mov STACK_CPUINFO_FIELD(shadow_spec_ctrl)(%rbx), %eax
-     mov $MSR_SPEC_CTRL, %ecx
-@@ -206,7 +209,7 @@
-     mov %eax, CPUINFO_shadow_spec_ctrl(%rsp)
- 
-     /* Set SPEC_CTRL shadowing *before* loading the guest value. */
--    movb $1, CPUINFO_use_shadow_spec_ctrl(%rsp)
-+    orb $SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp)
- 
-     mov $MSR_SPEC_CTRL, %ecx
-     xor %edx, %edx
-@@ -217,7 +220,7 @@
- #define SPEC_CTRL_ENTRY_FROM_VMEXIT                                     \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
-         DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT;                       \
--    ALTERNATIVE_2 __stringify(ASM_NOP32),                               \
-+    ALTERNATIVE_2 __stringify(ASM_NOP33),                               \
-         __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
-                     ibrs_val=SPEC_CTRL_IBRS),                           \
-         X86_FEATURE_XEN_IBRS_SET,                                       \
-@@ -229,7 +232,7 @@
- #define SPEC_CTRL_ENTRY_FROM_PV                                         \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
-         DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
--    ALTERNATIVE_2 __stringify(ASM_NOP21),                               \
-+    ALTERNATIVE_2 __stringify(ASM_NOP22),                               \
-         __stringify(DO_SPEC_CTRL_ENTRY maybexen=0                       \
-                     ibrs_val=SPEC_CTRL_IBRS),                           \
-         X86_FEATURE_XEN_IBRS_SET,                                       \
-@@ -240,7 +243,7 @@
- #define SPEC_CTRL_ENTRY_FROM_INTR                                       \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
-         DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
--    ALTERNATIVE_2 __stringify(ASM_NOP29),                               \
-+    ALTERNATIVE_2 __stringify(ASM_NOP33),                               \
-         __stringify(DO_SPEC_CTRL_ENTRY maybexen=1                       \
-                     ibrs_val=SPEC_CTRL_IBRS),                           \
-         X86_FEATURE_XEN_IBRS_SET,                                       \
-@@ -268,22 +271,23 @@
-  * This is logical merge of DO_OVERWRITE_RSB and DO_SPEC_CTRL_ENTRY
-  * maybexen=1, but with conditionals rather than alternatives.
-  */
--    movzbl STACK_CPUINFO_FIELD(bti_ist_info)(%r14), %eax
-+    movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %eax
- 
--    testb $BTI_IST_RSB, %al
-+    test $SCF_ist_rsb, %al
-     jz .L\@_skip_rsb
- 
-     DO_OVERWRITE_RSB tmp=rdx /* Clobbers %rcx/%rdx */
- 
- .L\@_skip_rsb:
- 
--    testb $BTI_IST_WRMSR, %al
-+    test $SCF_ist_wrmsr, %al
-     jz .L\@_skip_wrmsr
- 
-     xor %edx, %edx
-     testb $3, UREGS_cs(%rsp)
--    setz %dl
--    and %dl, STACK_CPUINFO_FIELD(use_shadow_spec_ctrl)(%r14)
-+    setnz %dl
-+    not %edx
-+    and %dl, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
- 
-     /* Load Xen's intended value. */
-     mov $MSR_SPEC_CTRL, %ecx
-@@ -310,7 +314,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
-  * Requires %rbx=stack_end
-  * Clobbers %rax, %rcx, %rdx
-  */
--    testb $BTI_IST_WRMSR, STACK_CPUINFO_FIELD(bti_ist_info)(%rbx)
-+    testb $SCF_ist_wrmsr, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%rbx)
-     jz .L\@_skip
- 
-     DO_SPEC_CTRL_EXIT_TO_XEN
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0004-x86-spec_ctrl-Fold-the-XEN_IBRS_-SET-CLEAR-ALTERNATI.patch b/system/xen/xsa/xsa263-4.10-0004-x86-spec_ctrl-Fold-the-XEN_IBRS_-SET-CLEAR-ALTERNATI.patch
deleted file mode 100644
index f6e87244dc..0000000000
--- a/system/xen/xsa/xsa263-4.10-0004-x86-spec_ctrl-Fold-the-XEN_IBRS_-SET-CLEAR-ALTERNATI.patch
+++ /dev/null
@@ -1,221 +0,0 @@
-From 0b1aded85866f48cdede20c54d30cf593f8a83f7 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 17 Apr 2018 14:15:04 +0100
-Subject: [PATCH] x86/spec_ctrl: Fold the XEN_IBRS_{SET,CLEAR} ALTERNATIVES
- together
-
-Currently, the SPEC_CTRL_{ENTRY,EXIT}_* macros encode Xen's choice of
-MSR_SPEC_CTRL as an immediate constant, and chooses between IBRS or not by
-doubling up the entire alternative block.
-
-There is now a variable holding Xen's choice of value, so use that and
-simplify the alternatives.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
-(cherry picked from commit af949407eaba7af71067f23d5866cd0bf1f1144d)
----
- xen/arch/x86/spec_ctrl.c            | 12 +++++-----
- xen/include/asm-x86/cpufeatures.h   |  3 +--
- xen/include/asm-x86/nops.h          |  3 ++-
- xen/include/asm-x86/spec_ctrl.h     |  6 ++---
- xen/include/asm-x86/spec_ctrl_asm.h | 45 +++++++++++++------------------------
- 5 files changed, 26 insertions(+), 43 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 2d69910..b62cfcc 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -112,8 +112,9 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
-            thunk == THUNK_LFENCE    ? "LFENCE" :
-            thunk == THUNK_JMP       ? "JMP" : "?",
--           boot_cpu_has(X86_FEATURE_XEN_IBRS_SET)    ? " IBRS+" :
--           boot_cpu_has(X86_FEATURE_XEN_IBRS_CLEAR)  ? " IBRS-"      : "",
-+           boot_cpu_has(X86_FEATURE_SC_MSR) ?
-+           default_xen_spec_ctrl & SPEC_CTRL_IBRS    ? " IBRS+" :
-+                                                       " IBRS-"      : "",
-            opt_ibpb                                  ? " IBPB"       : "",
-            boot_cpu_has(X86_FEATURE_RSB_NATIVE)      ? " RSB_NATIVE" : "",
-            boot_cpu_has(X86_FEATURE_RSB_VMEXIT)      ? " RSB_VMEXIT" : "");
-@@ -285,13 +286,10 @@ void __init init_speculation_mitigations(void)
-          * need the IBRS entry/exit logic to virtualise IBRS support for
-          * guests.
-          */
-+        setup_force_cpu_cap(X86_FEATURE_SC_MSR);
-+
-         if ( ibrs )
--        {
-             default_xen_spec_ctrl |= SPEC_CTRL_IBRS;
--            setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_SET);
--        }
--        else
--            setup_force_cpu_cap(X86_FEATURE_XEN_IBRS_CLEAR);
- 
-         default_spec_ctrl_flags |= SCF_ist_wrmsr;
-     }
-diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
-index c9b1a48..ca58b0e 100644
---- a/xen/include/asm-x86/cpufeatures.h
-+++ b/xen/include/asm-x86/cpufeatures.h
-@@ -26,8 +26,7 @@ XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+12) /* lfence set as Dispatch S
- XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+13) /* Use IND_THUNK_LFENCE */
- XEN_CPUFEATURE(IND_THUNK_JMP,   (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */
- XEN_CPUFEATURE(XEN_IBPB,        (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */
--XEN_CPUFEATURE(XEN_IBRS_SET,    (FSCAPINTS+0)*32+16) /* IBRSB && IRBS set in Xen */
--XEN_CPUFEATURE(XEN_IBRS_CLEAR,  (FSCAPINTS+0)*32+17) /* IBRSB && IBRS clear in Xen */
-+XEN_CPUFEATURE(SC_MSR,          (FSCAPINTS+0)*32+16) /* MSR_SPEC_CTRL used by Xen */
- XEN_CPUFEATURE(RSB_NATIVE,      (FSCAPINTS+0)*32+18) /* RSB overwrite needed for native */
- XEN_CPUFEATURE(RSB_VMEXIT,      (FSCAPINTS+0)*32+19) /* RSB overwrite needed for vmexit */
- XEN_CPUFEATURE(NO_XPTI,         (FSCAPINTS+0)*32+20) /* XPTI mitigation not in use */
-diff --git a/xen/include/asm-x86/nops.h b/xen/include/asm-x86/nops.h
-index b744895..913e9f0 100644
---- a/xen/include/asm-x86/nops.h
-+++ b/xen/include/asm-x86/nops.h
-@@ -62,9 +62,10 @@
- #define ASM_NOP8 _ASM_MK_NOP(K8_NOP8)
- 
- #define ASM_NOP17 ASM_NOP8; ASM_NOP7; ASM_NOP2
--#define ASM_NOP22 ASM_NOP8; ASM_NOP8; ASM_NOP6
- #define ASM_NOP24 ASM_NOP8; ASM_NOP8; ASM_NOP8
-+#define ASM_NOP25 ASM_NOP8; ASM_NOP8; ASM_NOP7; ASM_NOP2
- #define ASM_NOP33 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP7; ASM_NOP2
-+#define ASM_NOP36 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP4
- #define ASM_NOP40 ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8; ASM_NOP8
- 
- #define ASM_NOP_MAX 8
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 059e291..7d7c42e 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -52,14 +52,14 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
-     barrier();
-     info->spec_ctrl_flags |= SCF_use_shadow;
-     barrier();
--    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET)
-+    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR)
-                    :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" );
- }
- 
- /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */
- static always_inline void spec_ctrl_exit_idle(struct cpu_info *info)
- {
--    uint32_t val = SPEC_CTRL_IBRS;
-+    uint32_t val = info->xen_spec_ctrl;
- 
-     /*
-      * Disable shadowing before updating the MSR.  There are no SMP issues
-@@ -67,7 +67,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info)
-      */
-     info->spec_ctrl_flags &= ~SCF_use_shadow;
-     barrier();
--    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_XEN_IBRS_SET)
-+    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR)
-                    :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" );
- }
- 
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 39fb4f8..17dd2cc 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -117,7 +117,7 @@
-     mov %\tmp, %rsp                 /* Restore old %rsp */
- .endm
- 
--.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT ibrs_val:req
-+.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT
- /*
-  * Requires %rbx=current, %rsp=regs/cpuinfo
-  * Clobbers %rax, %rcx, %rdx
-@@ -138,11 +138,11 @@
-     andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp)
- 
-     /* Load Xen's intended value. */
--    mov $\ibrs_val, %eax
-+    movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
-     wrmsr
- .endm
- 
--.macro DO_SPEC_CTRL_ENTRY maybexen:req ibrs_val:req
-+.macro DO_SPEC_CTRL_ENTRY maybexen:req
- /*
-  * Requires %rsp=regs (also cpuinfo if !maybexen)
-  * Requires %r14=stack_end (if maybexen)
-@@ -167,12 +167,12 @@
-         setnz %al
-         not %eax
-         and %al, STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14)
-+        movzbl STACK_CPUINFO_FIELD(xen_spec_ctrl)(%r14), %eax
-     .else
-         andb $~SCF_use_shadow, CPUINFO_spec_ctrl_flags(%rsp)
-+        movzbl CPUINFO_xen_spec_ctrl(%rsp), %eax
-     .endif
- 
--    /* Load Xen's intended value. */
--    mov $\ibrs_val, %eax
-     wrmsr
- .endm
- 
-@@ -220,47 +220,32 @@
- #define SPEC_CTRL_ENTRY_FROM_VMEXIT                                     \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
-         DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT;                       \
--    ALTERNATIVE_2 __stringify(ASM_NOP33),                               \
--        __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
--                    ibrs_val=SPEC_CTRL_IBRS),                           \
--        X86_FEATURE_XEN_IBRS_SET,                                       \
--        __stringify(DO_SPEC_CTRL_ENTRY_FROM_VMEXIT                      \
--                    ibrs_val=0),                                        \
--        X86_FEATURE_XEN_IBRS_CLEAR
-+    ALTERNATIVE __stringify(ASM_NOP36),                                 \
-+        DO_SPEC_CTRL_ENTRY_FROM_VMEXIT, X86_FEATURE_SC_MSR
- 
- /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
- #define SPEC_CTRL_ENTRY_FROM_PV                                         \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
-         DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
--    ALTERNATIVE_2 __stringify(ASM_NOP22),                               \
--        __stringify(DO_SPEC_CTRL_ENTRY maybexen=0                       \
--                    ibrs_val=SPEC_CTRL_IBRS),                           \
--        X86_FEATURE_XEN_IBRS_SET,                                       \
--        __stringify(DO_SPEC_CTRL_ENTRY maybexen=0 ibrs_val=0),          \
--        X86_FEATURE_XEN_IBRS_CLEAR
-+    ALTERNATIVE __stringify(ASM_NOP25),                                 \
-+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), X86_FEATURE_SC_MSR
- 
- /* Use in interrupt/exception context.  May interrupt Xen or PV context. */
- #define SPEC_CTRL_ENTRY_FROM_INTR                                       \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
-         DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
--    ALTERNATIVE_2 __stringify(ASM_NOP33),                               \
--        __stringify(DO_SPEC_CTRL_ENTRY maybexen=1                       \
--                    ibrs_val=SPEC_CTRL_IBRS),                           \
--        X86_FEATURE_XEN_IBRS_SET,                                       \
--        __stringify(DO_SPEC_CTRL_ENTRY maybexen=1 ibrs_val=0),          \
--        X86_FEATURE_XEN_IBRS_CLEAR
-+    ALTERNATIVE __stringify(ASM_NOP33),                                 \
-+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), X86_FEATURE_SC_MSR
- 
- /* Use when exiting to Xen context. */
- #define SPEC_CTRL_EXIT_TO_XEN                                           \
--    ALTERNATIVE_2 __stringify(ASM_NOP17),                               \
--        DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_SET,             \
--        DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_XEN_IBRS_CLEAR
-+    ALTERNATIVE __stringify(ASM_NOP17),                                 \
-+        DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR
- 
- /* Use when exiting to guest context. */
- #define SPEC_CTRL_EXIT_TO_GUEST                                         \
--    ALTERNATIVE_2 __stringify(ASM_NOP24),                               \
--        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_SET,           \
--        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_XEN_IBRS_CLEAR
-+    ALTERNATIVE __stringify(ASM_NOP24),                                 \
-+        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR
- 
- /* TODO: Drop these when the alternatives infrastructure is NMI/#MC safe. */
- .macro SPEC_CTRL_ENTRY_FROM_INTR_IST
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0005-x86-spec_ctrl-Rename-bits-of-infrastructure-to-avoid.patch b/system/xen/xsa/xsa263-4.10-0005-x86-spec_ctrl-Rename-bits-of-infrastructure-to-avoid.patch
deleted file mode 100644
index f4efabeb46..0000000000
--- a/system/xen/xsa/xsa263-4.10-0005-x86-spec_ctrl-Rename-bits-of-infrastructure-to-avoid.patch
+++ /dev/null
@@ -1,273 +0,0 @@
-From 5cc3611de7d09140e55caa2c2d120ad326fff937 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Mon, 30 Apr 2018 14:20:23 +0100
-Subject: [PATCH] x86/spec_ctrl: Rename bits of infrastructure to avoid NATIVE
- and VMEXIT
-
-In hindsight, using NATIVE and VMEXIT as naming terminology was not clever.
-A future change wants to split SPEC_CTRL_EXIT_TO_GUEST into PV and HVM
-specific implementations, and using VMEXIT as a term is completely wrong.
-
-Take the opportunity to fix some stale documentation in spec_ctrl_asm.h.  The
-IST helpers were missing from the large comment block, and since
-SPEC_CTRL_ENTRY_FROM_INTR_IST was introduced, we've gained a new piece of
-functionality which currently depends on the fine grain control, which exists
-in lieu of livepatching.  Note this in the comment.
-
-No functional change.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
-(cherry picked from commit d9822b8a38114e96e4516dc998f4055249364d5d)
----
- xen/arch/x86/hvm/svm/entry.S        |  4 ++--
- xen/arch/x86/hvm/vmx/entry.S        |  4 ++--
- xen/arch/x86/spec_ctrl.c            | 20 ++++++++++----------
- xen/arch/x86/x86_64/compat/entry.S  |  2 +-
- xen/arch/x86/x86_64/entry.S         |  2 +-
- xen/include/asm-x86/cpufeatures.h   |  4 ++--
- xen/include/asm-x86/spec_ctrl_asm.h | 36 +++++++++++++++++++++++++-----------
- 7 files changed, 43 insertions(+), 29 deletions(-)
-
-diff --git a/xen/arch/x86/hvm/svm/entry.S b/xen/arch/x86/hvm/svm/entry.S
-index bf092fe..5e7c080 100644
---- a/xen/arch/x86/hvm/svm/entry.S
-+++ b/xen/arch/x86/hvm/svm/entry.S
-@@ -83,7 +83,7 @@ UNLIKELY_END(svm_trace)
-         mov VCPUMSR_spec_ctrl_raw(%rax), %eax
- 
-         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
--        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
-+        SPEC_CTRL_EXIT_TO_HVM   /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
- 
-         pop  %r15
-         pop  %r14
-@@ -108,7 +108,7 @@ UNLIKELY_END(svm_trace)
- 
-         GET_CURRENT(bx)
- 
--        SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_HVM    /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         mov  VCPU_svm_vmcb(%rbx),%rcx
-diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
-index e750544..aa2f103 100644
---- a/xen/arch/x86/hvm/vmx/entry.S
-+++ b/xen/arch/x86/hvm/vmx/entry.S
-@@ -38,7 +38,7 @@ ENTRY(vmx_asm_vmexit_handler)
-         movb $1,VCPU_vmx_launched(%rbx)
-         mov  %rax,VCPU_hvm_guest_cr2(%rbx)
- 
--        SPEC_CTRL_ENTRY_FROM_VMEXIT /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
-+        SPEC_CTRL_ENTRY_FROM_HVM    /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
-         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
- 
-         mov  %rsp,%rdi
-@@ -76,7 +76,7 @@ UNLIKELY_END(realmode)
-         mov VCPUMSR_spec_ctrl_raw(%rax), %eax
- 
-         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
--        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
-+        SPEC_CTRL_EXIT_TO_HVM   /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
- 
-         mov  VCPU_hvm_guest_cr2(%rbx),%rax
- 
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index b62cfcc..015a9e2 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -35,8 +35,8 @@ static enum ind_thunk {
-     THUNK_JMP,
- } opt_thunk __initdata = THUNK_DEFAULT;
- static int8_t __initdata opt_ibrs = -1;
--static bool __initdata opt_rsb_native = true;
--static bool __initdata opt_rsb_vmexit = true;
-+static bool __initdata opt_rsb_pv = true;
-+static bool __initdata opt_rsb_hvm = true;
- bool __read_mostly opt_ibpb = true;
- uint8_t __read_mostly default_xen_spec_ctrl;
- uint8_t __read_mostly default_spec_ctrl_flags;
-@@ -69,9 +69,9 @@ static int __init parse_bti(const char *s)
-         else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
-             opt_ibpb = val;
-         else if ( (val = parse_boolean("rsb_native", s, ss)) >= 0 )
--            opt_rsb_native = val;
-+            opt_rsb_pv = val;
-         else if ( (val = parse_boolean("rsb_vmexit", s, ss)) >= 0 )
--            opt_rsb_vmexit = val;
-+            opt_rsb_hvm = val;
-         else
-             rc = -EINVAL;
- 
-@@ -116,8 +116,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-            default_xen_spec_ctrl & SPEC_CTRL_IBRS    ? " IBRS+" :
-                                                        " IBRS-"      : "",
-            opt_ibpb                                  ? " IBPB"       : "",
--           boot_cpu_has(X86_FEATURE_RSB_NATIVE)      ? " RSB_NATIVE" : "",
--           boot_cpu_has(X86_FEATURE_RSB_VMEXIT)      ? " RSB_VMEXIT" : "");
-+           boot_cpu_has(X86_FEATURE_SC_RSB_PV)       ? " RSB_NATIVE" : "",
-+           boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB_VMEXIT" : "");
- 
-     printk("XPTI: %s\n",
-            boot_cpu_has(X86_FEATURE_NO_XPTI) ? "disabled" : "enabled");
-@@ -307,9 +307,9 @@ void __init init_speculation_mitigations(void)
-      * If a processors speculates to 32bit PV guest kernel mappings, it is
-      * speculating in 64bit supervisor mode, and can leak data.
-      */
--    if ( opt_rsb_native )
-+    if ( opt_rsb_pv )
-     {
--        setup_force_cpu_cap(X86_FEATURE_RSB_NATIVE);
-+        setup_force_cpu_cap(X86_FEATURE_SC_RSB_PV);
-         default_spec_ctrl_flags |= SCF_ist_rsb;
-     }
- 
-@@ -317,8 +317,8 @@ void __init init_speculation_mitigations(void)
-      * HVM guests can always poison the RSB to point at Xen supervisor
-      * mappings.
-      */
--    if ( opt_rsb_vmexit )
--        setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
-+    if ( opt_rsb_hvm )
-+        setup_force_cpu_cap(X86_FEATURE_SC_RSB_HVM);
- 
-     /* Check we have hardware IBPB support before using it... */
-     if ( !boot_cpu_has(X86_FEATURE_IBRSB) && !boot_cpu_has(X86_FEATURE_IBPB) )
-diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S
-index a47cb9d..6a27d98 100644
---- a/xen/arch/x86/x86_64/compat/entry.S
-+++ b/xen/arch/x86/x86_64/compat/entry.S
-@@ -166,7 +166,7 @@ ENTRY(compat_restore_all_guest)
-         mov VCPUMSR_spec_ctrl_raw(%rax), %eax
- 
-         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
--        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
-+        SPEC_CTRL_EXIT_TO_PV    /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
- 
-         RESTORE_ALL adj=8 compat=1
- .Lft0:  iretq
-diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S
-index 41d3ec2..0a0763a 100644
---- a/xen/arch/x86/x86_64/entry.S
-+++ b/xen/arch/x86/x86_64/entry.S
-@@ -196,7 +196,7 @@ restore_all_guest:
-         mov   %r15d, %eax
- 
-         /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */
--        SPEC_CTRL_EXIT_TO_GUEST /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
-+        SPEC_CTRL_EXIT_TO_PV    /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */
- 
-         RESTORE_ALL
-         testw $TRAP_syscall,4(%rsp)
-diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
-index ca58b0e..f9aa5d7 100644
---- a/xen/include/asm-x86/cpufeatures.h
-+++ b/xen/include/asm-x86/cpufeatures.h
-@@ -27,6 +27,6 @@ XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+13) /* Use IND_THUNK_LFENCE */
- XEN_CPUFEATURE(IND_THUNK_JMP,   (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */
- XEN_CPUFEATURE(XEN_IBPB,        (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */
- XEN_CPUFEATURE(SC_MSR,          (FSCAPINTS+0)*32+16) /* MSR_SPEC_CTRL used by Xen */
--XEN_CPUFEATURE(RSB_NATIVE,      (FSCAPINTS+0)*32+18) /* RSB overwrite needed for native */
--XEN_CPUFEATURE(RSB_VMEXIT,      (FSCAPINTS+0)*32+19) /* RSB overwrite needed for vmexit */
-+XEN_CPUFEATURE(SC_RSB_PV,       (FSCAPINTS+0)*32+18) /* RSB overwrite needed for PV */
-+XEN_CPUFEATURE(SC_RSB_HVM,      (FSCAPINTS+0)*32+19) /* RSB overwrite needed for HVM */
- XEN_CPUFEATURE(NO_XPTI,         (FSCAPINTS+0)*32+20) /* XPTI mitigation not in use */
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 17dd2cc..3d156ed 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -72,11 +72,14 @@
-  *
-  * The following ASM fragments implement this algorithm.  See their local
-  * comments for further details.
-- *  - SPEC_CTRL_ENTRY_FROM_VMEXIT
-+ *  - SPEC_CTRL_ENTRY_FROM_HVM
-  *  - SPEC_CTRL_ENTRY_FROM_PV
-  *  - SPEC_CTRL_ENTRY_FROM_INTR
-+ *  - SPEC_CTRL_ENTRY_FROM_INTR_IST
-+ *  - SPEC_CTRL_EXIT_TO_XEN_IST
-  *  - SPEC_CTRL_EXIT_TO_XEN
-- *  - SPEC_CTRL_EXIT_TO_GUEST
-+ *  - SPEC_CTRL_EXIT_TO_PV
-+ *  - SPEC_CTRL_EXIT_TO_HVM
-  */
- 
- .macro DO_OVERWRITE_RSB tmp=rax
-@@ -117,7 +120,7 @@
-     mov %\tmp, %rsp                 /* Restore old %rsp */
- .endm
- 
--.macro DO_SPEC_CTRL_ENTRY_FROM_VMEXIT
-+.macro DO_SPEC_CTRL_ENTRY_FROM_HVM
- /*
-  * Requires %rbx=current, %rsp=regs/cpuinfo
-  * Clobbers %rax, %rcx, %rdx
-@@ -217,23 +220,23 @@
- .endm
- 
- /* Use after a VMEXIT from an HVM guest. */
--#define SPEC_CTRL_ENTRY_FROM_VMEXIT                                     \
-+#define SPEC_CTRL_ENTRY_FROM_HVM                                        \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
--        DO_OVERWRITE_RSB, X86_FEATURE_RSB_VMEXIT;                       \
-+        DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM;                       \
-     ALTERNATIVE __stringify(ASM_NOP36),                                 \
--        DO_SPEC_CTRL_ENTRY_FROM_VMEXIT, X86_FEATURE_SC_MSR
-+        DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR
- 
- /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
- #define SPEC_CTRL_ENTRY_FROM_PV                                         \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
--        DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
-+        DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV;                        \
-     ALTERNATIVE __stringify(ASM_NOP25),                                 \
-         __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), X86_FEATURE_SC_MSR
- 
- /* Use in interrupt/exception context.  May interrupt Xen or PV context. */
- #define SPEC_CTRL_ENTRY_FROM_INTR                                       \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
--        DO_OVERWRITE_RSB, X86_FEATURE_RSB_NATIVE;                       \
-+        DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV;                        \
-     ALTERNATIVE __stringify(ASM_NOP33),                                 \
-         __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), X86_FEATURE_SC_MSR
- 
-@@ -242,12 +245,22 @@
-     ALTERNATIVE __stringify(ASM_NOP17),                                 \
-         DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR
- 
--/* Use when exiting to guest context. */
--#define SPEC_CTRL_EXIT_TO_GUEST                                         \
-+/* Use when exiting to PV guest context. */
-+#define SPEC_CTRL_EXIT_TO_PV                                            \
-     ALTERNATIVE __stringify(ASM_NOP24),                                 \
-         DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR
- 
--/* TODO: Drop these when the alternatives infrastructure is NMI/#MC safe. */
-+/* Use when exiting to HVM guest context. */
-+#define SPEC_CTRL_EXIT_TO_HVM                                           \
-+    ALTERNATIVE __stringify(ASM_NOP24),                                 \
-+        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR
-+
-+/*
-+ * Use in IST interrupt/exception context.  May interrupt Xen or PV context.
-+ * Fine grain control of SCF_ist_wrmsr is needed for safety in the S3 resume
-+ * path to avoid using MSR_SPEC_CTRL before the microcode introducing it has
-+ * been reloaded.
-+ */
- .macro SPEC_CTRL_ENTRY_FROM_INTR_IST
- /*
-  * Requires %rsp=regs, %r14=stack_end
-@@ -294,6 +307,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise):
-     UNLIKELY_END(\@_serialise)
- .endm
- 
-+/* Use when exiting to Xen in IST context. */
- .macro SPEC_CTRL_EXIT_TO_XEN_IST
- /*
-  * Requires %rbx=stack_end
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0006-x86-spec_ctrl-Elide-MSR_SPEC_CTRL-handling-in-idle-c.patch b/system/xen/xsa/xsa263-4.10-0006-x86-spec_ctrl-Elide-MSR_SPEC_CTRL-handling-in-idle-c.patch
deleted file mode 100644
index cbc7fb48d0..0000000000
--- a/system/xen/xsa/xsa263-4.10-0006-x86-spec_ctrl-Elide-MSR_SPEC_CTRL-handling-in-idle-c.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From 811fcf5137abdcd5b9ea7e5212098adb5bedae0f Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Mon, 7 May 2018 14:06:16 +0100
-Subject: [PATCH] x86/spec_ctrl: Elide MSR_SPEC_CTRL handling in idle context
- when possible
-
-If Xen is virtualising MSR_SPEC_CTRL handling for guests, but using 0 as its
-own MSR_SPEC_CTRL value, spec_ctrl_{enter,exit}_idle() need not write to the
-MSR.
-
-Requested-by: Jan Beulich <JBeulich@suse.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
-(cherry picked from commit 94df6e8588e35cc2028ccb3fd2921c6e6360605e)
----
- xen/arch/x86/spec_ctrl.c          | 4 ++++
- xen/include/asm-x86/cpufeatures.h | 1 +
- xen/include/asm-x86/spec_ctrl.h   | 4 ++--
- 3 files changed, 7 insertions(+), 2 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 015a9e2..55ef79f 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -327,6 +327,10 @@ void __init init_speculation_mitigations(void)
-     /* (Re)init BSP state now that default_spec_ctrl_flags has been calculated. */
-     init_shadow_spec_ctrl_state();
- 
-+    /* If Xen is using any MSR_SPEC_CTRL settings, adjust the idle path. */
-+    if ( default_xen_spec_ctrl )
-+        setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE);
-+
-     print_details(thunk, caps);
- }
- 
-diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
-index f9aa5d7..32b7f04 100644
---- a/xen/include/asm-x86/cpufeatures.h
-+++ b/xen/include/asm-x86/cpufeatures.h
-@@ -30,3 +30,4 @@ XEN_CPUFEATURE(SC_MSR,          (FSCAPINTS+0)*32+16) /* MSR_SPEC_CTRL used by Xe
- XEN_CPUFEATURE(SC_RSB_PV,       (FSCAPINTS+0)*32+18) /* RSB overwrite needed for PV */
- XEN_CPUFEATURE(SC_RSB_HVM,      (FSCAPINTS+0)*32+19) /* RSB overwrite needed for HVM */
- XEN_CPUFEATURE(NO_XPTI,         (FSCAPINTS+0)*32+20) /* XPTI mitigation not in use */
-+XEN_CPUFEATURE(SC_MSR_IDLE,     (FSCAPINTS+0)*32+21) /* SC_MSR && default_xen_spec_ctrl */
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 7d7c42e..77f92ba 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -52,7 +52,7 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info)
-     barrier();
-     info->spec_ctrl_flags |= SCF_use_shadow;
-     barrier();
--    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR)
-+    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR_IDLE)
-                    :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" );
- }
- 
-@@ -67,7 +67,7 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info)
-      */
-     info->spec_ctrl_flags &= ~SCF_use_shadow;
-     barrier();
--    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR)
-+    asm volatile ( ALTERNATIVE(ASM_NOP3, "wrmsr", X86_FEATURE_SC_MSR_IDLE)
-                    :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" );
- }
- 
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0007-x86-spec_ctrl-Split-X86_FEATURE_SC_MSR-into-PV-and-H.patch b/system/xen/xsa/xsa263-4.10-0007-x86-spec_ctrl-Split-X86_FEATURE_SC_MSR-into-PV-and-H.patch
deleted file mode 100644
index a4e59aae9f..0000000000
--- a/system/xen/xsa/xsa263-4.10-0007-x86-spec_ctrl-Split-X86_FEATURE_SC_MSR-into-PV-and-H.patch
+++ /dev/null
@@ -1,111 +0,0 @@
-From 2acc4cba7eb2559bafdd4d8238466ad81322a35a Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 17 Apr 2018 14:15:04 +0100
-Subject: [PATCH] x86/spec_ctrl: Split X86_FEATURE_SC_MSR into PV and HVM
- variants
-
-In order to separately control whether MSR_SPEC_CTRL is virtualised for PV and
-HVM guests, split the feature used to control runtime alternatives into two.
-Xen will use MSR_SPEC_CTRL itself if either of these features are active.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
-(cherry picked from commit fa9eb09d446a1279f5e861e6b84fa8675dabf148)
----
- xen/arch/x86/spec_ctrl.c            |  6 ++++--
- xen/include/asm-x86/cpufeatures.h   |  5 +++--
- xen/include/asm-x86/spec_ctrl_asm.h | 12 ++++++------
- 3 files changed, 13 insertions(+), 10 deletions(-)
-
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 55ef79f..a940308 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -112,7 +112,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
-            thunk == THUNK_LFENCE    ? "LFENCE" :
-            thunk == THUNK_JMP       ? "JMP" : "?",
--           boot_cpu_has(X86_FEATURE_SC_MSR) ?
-+           (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
-+            boot_cpu_has(X86_FEATURE_SC_MSR_HVM)) ?
-            default_xen_spec_ctrl & SPEC_CTRL_IBRS    ? " IBRS+" :
-                                                        " IBRS-"      : "",
-            opt_ibpb                                  ? " IBPB"       : "",
-@@ -286,7 +287,8 @@ void __init init_speculation_mitigations(void)
-          * need the IBRS entry/exit logic to virtualise IBRS support for
-          * guests.
-          */
--        setup_force_cpu_cap(X86_FEATURE_SC_MSR);
-+        setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV);
-+        setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM);
- 
-         if ( ibrs )
-             default_xen_spec_ctrl |= SPEC_CTRL_IBRS;
-diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
-index 32b7f04..b90aa2d 100644
---- a/xen/include/asm-x86/cpufeatures.h
-+++ b/xen/include/asm-x86/cpufeatures.h
-@@ -26,8 +26,9 @@ XEN_CPUFEATURE(LFENCE_DISPATCH, (FSCAPINTS+0)*32+12) /* lfence set as Dispatch S
- XEN_CPUFEATURE(IND_THUNK_LFENCE,(FSCAPINTS+0)*32+13) /* Use IND_THUNK_LFENCE */
- XEN_CPUFEATURE(IND_THUNK_JMP,   (FSCAPINTS+0)*32+14) /* Use IND_THUNK_JMP */
- XEN_CPUFEATURE(XEN_IBPB,        (FSCAPINTS+0)*32+15) /* IBRSB || IBPB */
--XEN_CPUFEATURE(SC_MSR,          (FSCAPINTS+0)*32+16) /* MSR_SPEC_CTRL used by Xen */
-+XEN_CPUFEATURE(SC_MSR_PV,       (FSCAPINTS+0)*32+16) /* MSR_SPEC_CTRL used by Xen for PV */
-+XEN_CPUFEATURE(SC_MSR_HVM,      (FSCAPINTS+0)*32+17) /* MSR_SPEC_CTRL used by Xen for HVM */
- XEN_CPUFEATURE(SC_RSB_PV,       (FSCAPINTS+0)*32+18) /* RSB overwrite needed for PV */
- XEN_CPUFEATURE(SC_RSB_HVM,      (FSCAPINTS+0)*32+19) /* RSB overwrite needed for HVM */
- XEN_CPUFEATURE(NO_XPTI,         (FSCAPINTS+0)*32+20) /* XPTI mitigation not in use */
--XEN_CPUFEATURE(SC_MSR_IDLE,     (FSCAPINTS+0)*32+21) /* SC_MSR && default_xen_spec_ctrl */
-+XEN_CPUFEATURE(SC_MSR_IDLE,     (FSCAPINTS+0)*32+21) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */
-diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h
-index 3d156ed..c659f3f 100644
---- a/xen/include/asm-x86/spec_ctrl_asm.h
-+++ b/xen/include/asm-x86/spec_ctrl_asm.h
-@@ -224,36 +224,36 @@
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
-         DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_HVM;                       \
-     ALTERNATIVE __stringify(ASM_NOP36),                                 \
--        DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR
-+        DO_SPEC_CTRL_ENTRY_FROM_HVM, X86_FEATURE_SC_MSR_HVM
- 
- /* Use after an entry from PV context (syscall/sysenter/int80/int82/etc). */
- #define SPEC_CTRL_ENTRY_FROM_PV                                         \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
-         DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV;                        \
-     ALTERNATIVE __stringify(ASM_NOP25),                                 \
--        __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), X86_FEATURE_SC_MSR
-+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=0), X86_FEATURE_SC_MSR_PV
- 
- /* Use in interrupt/exception context.  May interrupt Xen or PV context. */
- #define SPEC_CTRL_ENTRY_FROM_INTR                                       \
-     ALTERNATIVE __stringify(ASM_NOP40),                                 \
-         DO_OVERWRITE_RSB, X86_FEATURE_SC_RSB_PV;                        \
-     ALTERNATIVE __stringify(ASM_NOP33),                                 \
--        __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), X86_FEATURE_SC_MSR
-+        __stringify(DO_SPEC_CTRL_ENTRY maybexen=1), X86_FEATURE_SC_MSR_PV
- 
- /* Use when exiting to Xen context. */
- #define SPEC_CTRL_EXIT_TO_XEN                                           \
-     ALTERNATIVE __stringify(ASM_NOP17),                                 \
--        DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR
-+        DO_SPEC_CTRL_EXIT_TO_XEN, X86_FEATURE_SC_MSR_PV
- 
- /* Use when exiting to PV guest context. */
- #define SPEC_CTRL_EXIT_TO_PV                                            \
-     ALTERNATIVE __stringify(ASM_NOP24),                                 \
--        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR
-+        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV
- 
- /* Use when exiting to HVM guest context. */
- #define SPEC_CTRL_EXIT_TO_HVM                                           \
-     ALTERNATIVE __stringify(ASM_NOP24),                                 \
--        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR
-+        DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM
- 
- /*
-  * Use in IST interrupt/exception context.  May interrupt Xen or PV context.
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0008-x86-spec_ctrl-Explicitly-set-Xen-s-default-MSR_SPEC_.patch b/system/xen/xsa/xsa263-4.10-0008-x86-spec_ctrl-Explicitly-set-Xen-s-default-MSR_SPEC_.patch
deleted file mode 100644
index 966ce7ee3f..0000000000
--- a/system/xen/xsa/xsa263-4.10-0008-x86-spec_ctrl-Explicitly-set-Xen-s-default-MSR_SPEC_.patch
+++ /dev/null
@@ -1,134 +0,0 @@
-From 5b223f41d59887ea5d13e2406597ff472ba6f2fc Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 9 May 2018 13:59:56 +0100
-Subject: [PATCH] x86/spec_ctrl: Explicitly set Xen's default MSR_SPEC_CTRL
- value
-
-With the impending ability to disable MSR_SPEC_CTRL handling on a
-per-guest-type basis, the first exit-from-guest may not have the side effect
-of loading Xen's choice of value.  Explicitly set Xen's default during the BSP
-and AP boot paths.
-
-For the BSP however, delay setting a non-zero MSR_SPEC_CTRL default until
-after dom0 has been constructed when safe to do so.  Oracle report that this
-speeds up boots of some hardware by 50s.
-
-"when safe to do so" is based on whether we are virtualised.  A native boot
-won't have any other code running in a position to mount an attack.
-
-Reported-by: Zhenzhong Duan <zhenzhong.duan@oracle.com>
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
-(cherry picked from commit cb8c12020307b39a89273d7699e89000451987ab)
----
- xen/arch/x86/setup.c            |  7 +++++++
- xen/arch/x86/smpboot.c          |  8 ++++++++
- xen/arch/x86/spec_ctrl.c        | 32 ++++++++++++++++++++++++++++++++
- xen/include/asm-x86/spec_ctrl.h |  2 ++
- 4 files changed, 49 insertions(+)
-
-diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
-index 482fe11..1995c4c 100644
---- a/xen/arch/x86/setup.c
-+++ b/xen/arch/x86/setup.c
-@@ -1746,6 +1746,13 @@ void __init noreturn __start_xen(unsigned long mbi_p)
- 
-     setup_io_bitmap(dom0);
- 
-+    if ( bsp_delay_spec_ctrl )
-+    {
-+        get_cpu_info()->spec_ctrl_flags &= ~SCF_use_shadow;
-+        barrier();
-+        wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl);
-+    }
-+
-     /* Jump to the 1:1 virtual mappings of cpu0_stack. */
-     asm volatile ("mov %[stk], %%rsp; jmp %c[fn]" ::
-                   [stk] "g" (__va(__pa(get_stack_bottom()))),
-diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
-index f81fc2c..ee8b183 100644
---- a/xen/arch/x86/smpboot.c
-+++ b/xen/arch/x86/smpboot.c
-@@ -351,6 +351,14 @@ void start_secondary(void *unused)
-     else
-         microcode_resume_cpu(cpu);
- 
-+    /*
-+     * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard
-+     * any firmware settings.  Note: MSR_SPEC_CTRL may only become available
-+     * after loading microcode.
-+     */
-+    if ( boot_cpu_has(X86_FEATURE_IBRSB) )
-+        wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl);
-+
-     if ( xen_guest )
-         hypervisor_ap_setup();
- 
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index a940308..3adec1a 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -38,6 +38,8 @@ static int8_t __initdata opt_ibrs = -1;
- static bool __initdata opt_rsb_pv = true;
- static bool __initdata opt_rsb_hvm = true;
- bool __read_mostly opt_ibpb = true;
-+
-+bool __initdata bsp_delay_spec_ctrl;
- uint8_t __read_mostly default_xen_spec_ctrl;
- uint8_t __read_mostly default_spec_ctrl_flags;
- 
-@@ -334,6 +336,36 @@ void __init init_speculation_mitigations(void)
-         setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE);
- 
-     print_details(thunk, caps);
-+
-+    /*
-+     * If MSR_SPEC_CTRL is available, apply Xen's default setting and discard
-+     * any firmware settings.  For performance reasons, when safe to do so, we
-+     * delay applying non-zero settings until after dom0 has been constructed.
-+     *
-+     * "when safe to do so" is based on whether we are virtualised.  A native
-+     * boot won't have any other code running in a position to mount an
-+     * attack.
-+     */
-+    if ( boot_cpu_has(X86_FEATURE_IBRSB) )
-+    {
-+        bsp_delay_spec_ctrl = !cpu_has_hypervisor && default_xen_spec_ctrl;
-+
-+        /*
-+         * If delaying MSR_SPEC_CTRL setup, use the same mechanism as
-+         * spec_ctrl_enter_idle(), by using a shadow value of zero.
-+         */
-+        if ( bsp_delay_spec_ctrl )
-+        {
-+            struct cpu_info *info = get_cpu_info();
-+
-+            info->shadow_spec_ctrl = 0;
-+            barrier();
-+            info->spec_ctrl_flags |= SCF_use_shadow;
-+            barrier();
-+        }
-+
-+        wrmsrl(MSR_SPEC_CTRL, bsp_delay_spec_ctrl ? 0 : default_xen_spec_ctrl);
-+    }
- }
- 
- static void __init __maybe_unused build_assertions(void)
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index 77f92ba..c6a38f4 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -27,6 +27,8 @@
- void init_speculation_mitigations(void);
- 
- extern bool opt_ibpb;
-+
-+extern bool bsp_delay_spec_ctrl;
- extern uint8_t default_xen_spec_ctrl;
- extern uint8_t default_spec_ctrl_flags;
- 
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0009-x86-cpuid-Improvements-to-guest-policies-for-specula.patch b/system/xen/xsa/xsa263-4.10-0009-x86-cpuid-Improvements-to-guest-policies-for-specula.patch
deleted file mode 100644
index 90b1ffc87f..0000000000
--- a/system/xen/xsa/xsa263-4.10-0009-x86-cpuid-Improvements-to-guest-policies-for-specula.patch
+++ /dev/null
@@ -1,132 +0,0 @@
-From bce7a2145abc3c7e5bfd7e2168714d194124a3ab Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Tue, 1 May 2018 11:59:03 +0100
-Subject: [PATCH] x86/cpuid: Improvements to guest policies for speculative
- sidechannel features
-
-If Xen isn't virtualising MSR_SPEC_CTRL for guests, IBRSB shouldn't be
-advertised.  It is not currently possible to express this via the existing
-command line options, but such an ability will be introduced.
-
-Another useful option in some usecases is to offer IBPB without IBRS.  When a
-guest kernel is known to be compatible (uses retpoline and knows about the AMD
-IBPB feature bit), an administrator with pre-Skylake hardware may wish to hide
-IBRS.  This allows the VM to have full protection, without Xen or the VM
-needing to touch MSR_SPEC_CTRL, which can reduce the overhead of Spectre
-mitigations.
-
-Break the logic common to both PV and HVM CPUID calculations into a common
-helper, to avoid duplication.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
-(cherry picked from commit cb06b308ec71b23f37a44f5e2351fe2cae0306e9)
----
- xen/arch/x86/cpuid.c | 60 ++++++++++++++++++++++++++++++++--------------------
- 1 file changed, 37 insertions(+), 23 deletions(-)
-
-diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
-index b3c9ac6..b45b145 100644
---- a/xen/arch/x86/cpuid.c
-+++ b/xen/arch/x86/cpuid.c
-@@ -368,6 +368,28 @@ static void __init calculate_host_policy(void)
-     }
- }
- 
-+static void __init guest_common_feature_adjustments(uint32_t *fs)
-+{
-+    /* Unconditionally claim to be able to set the hypervisor bit. */
-+    __set_bit(X86_FEATURE_HYPERVISOR, fs);
-+
-+    /*
-+     * If IBRS is offered to the guest, unconditionally offer STIBP.  It is a
-+     * nop on non-HT hardware, and has this behaviour to make heterogeneous
-+     * setups easier to manage.
-+     */
-+    if ( test_bit(X86_FEATURE_IBRSB, fs) )
-+        __set_bit(X86_FEATURE_STIBP, fs);
-+
-+    /*
-+     * On hardware which supports IBRS/IBPB, we can offer IBPB independently
-+     * of IBRS by using the AMD feature bit.  An administrator may wish for
-+     * performance reasons to offer IBPB without IBRS.
-+     */
-+    if ( host_cpuid_policy.feat.ibrsb )
-+        __set_bit(X86_FEATURE_IBPB, fs);
-+}
-+
- static void __init calculate_pv_max_policy(void)
- {
-     struct cpuid_policy *p = &pv_max_cpuid_policy;
-@@ -380,18 +402,14 @@ static void __init calculate_pv_max_policy(void)
-     for ( i = 0; i < ARRAY_SIZE(pv_featureset); ++i )
-         pv_featureset[i] &= pv_featuremask[i];
- 
--    /* Unconditionally claim to be able to set the hypervisor bit. */
--    __set_bit(X86_FEATURE_HYPERVISOR, pv_featureset);
--
--    /* On hardware with IBRS/IBPB support, there are further adjustments. */
--    if ( test_bit(X86_FEATURE_IBRSB, pv_featureset) )
--    {
--        /* Offer STIBP unconditionally.  It is a nop on non-HT hardware. */
--        __set_bit(X86_FEATURE_STIBP, pv_featureset);
-+    /*
-+     * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests because of
-+     * administrator choice, hide the feature.
-+     */
-+    if ( !boot_cpu_has(X86_FEATURE_SC_MSR_PV) )
-+        __clear_bit(X86_FEATURE_IBRSB, pv_featureset);
- 
--        /* AMD's IBPB is a subset of IBRS/IBPB. */
--        __set_bit(X86_FEATURE_IBPB, pv_featureset);
--    }
-+    guest_common_feature_adjustments(pv_featureset);
- 
-     sanitise_featureset(pv_featureset);
-     cpuid_featureset_to_policy(pv_featureset, p);
-@@ -419,9 +437,6 @@ static void __init calculate_hvm_max_policy(void)
-     for ( i = 0; i < ARRAY_SIZE(hvm_featureset); ++i )
-         hvm_featureset[i] &= hvm_featuremask[i];
- 
--    /* Unconditionally claim to be able to set the hypervisor bit. */
--    __set_bit(X86_FEATURE_HYPERVISOR, hvm_featureset);
--
-     /*
-      * Xen can provide an APIC emulation to HVM guests even if the host's APIC
-      * isn't enabled.
-@@ -438,6 +453,13 @@ static void __init calculate_hvm_max_policy(void)
-         __set_bit(X86_FEATURE_SEP, hvm_featureset);
- 
-     /*
-+     * If Xen isn't virtualising MSR_SPEC_CTRL for HVM guests because of
-+     * administrator choice, hide the feature.
-+     */
-+    if ( !boot_cpu_has(X86_FEATURE_SC_MSR_HVM) )
-+        __clear_bit(X86_FEATURE_IBRSB, hvm_featureset);
-+
-+    /*
-      * With VT-x, some features are only supported by Xen if dedicated
-      * hardware support is also available.
-      */
-@@ -450,15 +472,7 @@ static void __init calculate_hvm_max_policy(void)
-             __clear_bit(X86_FEATURE_XSAVES, hvm_featureset);
-     }
- 
--    /* On hardware with IBRS/IBPB support, there are further adjustments. */
--    if ( test_bit(X86_FEATURE_IBRSB, hvm_featureset) )
--    {
--        /* Offer STIBP unconditionally.  It is a nop on non-HT hardware. */
--        __set_bit(X86_FEATURE_STIBP, hvm_featureset);
--
--        /* AMD's IBPB is a subset of IBRS/IBPB. */
--        __set_bit(X86_FEATURE_IBPB, hvm_featureset);
--    }
-+    guest_common_feature_adjustments(hvm_featureset);
- 
-     sanitise_featureset(hvm_featureset);
-     cpuid_featureset_to_policy(hvm_featureset, p);
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0010-x86-spec_ctrl-Introduce-a-new-spec-ctrl-command-line.patch b/system/xen/xsa/xsa263-4.10-0010-x86-spec_ctrl-Introduce-a-new-spec-ctrl-command-line.patch
deleted file mode 100644
index 9c8c3560bd..0000000000
--- a/system/xen/xsa/xsa263-4.10-0010-x86-spec_ctrl-Introduce-a-new-spec-ctrl-command-line.patch
+++ /dev/null
@@ -1,344 +0,0 @@
-From 952ff9f5590e37952d7dd3d89e16a47a238ab079 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 26 Apr 2018 10:52:55 +0100
-Subject: [PATCH] x86/spec_ctrl: Introduce a new `spec-ctrl=` command line
- argument to replace `bti=`
-
-In hindsight, the options for `bti=` aren't as flexible or useful as expected
-(including several options which don't appear to behave as intended).
-Changing the behaviour of an existing option is problematic for compatibility,
-so introduce a new `spec-ctrl=` in the hopes that we can do better.
-
-One common way of deploying Xen is with a single PV dom0 and all domUs being
-HVM domains.  In such a setup, an administrator who has weighed up the risks
-may wish to forgo protection against malicious PV domains, to reduce the
-overall performance hit.  To cater for this usecase, `spec-ctrl=no-pv` will
-disable all speculative protection for PV domains, while leaving all
-speculative protection for HVM domains intact.
-
-For coding clarity as much as anything else, the suboptions are grouped by
-logical area; those which affect the alternatives blocks, and those which
-affect Xen's in-hypervisor settings.  See the xen-command-line.markdown for
-full details of the new options.
-
-While changing the command line options, take the time to change how the data
-is reported to the user.  The three DEBUG printks are upgraded to unilateral,
-as they are all relevant pieces of information, and the old "mitigations:"
-line is split in the two logical areas described above.
-
-Sample output from booting with `spec-ctrl=no-pv` looks like:
-
-  (XEN) Speculative mitigation facilities:
-  (XEN)   Hardware features: IBRS/IBPB STIBP IBPB
-  (XEN)   Compiled-in support: INDIRECT_THUNK
-  (XEN)   Xen settings: BTI-Thunk RETPOLINE, SPEC_CTRL: IBRS-, Other: IBPB
-  (XEN)   Support for VMs: PV: None, HVM: MSR_SPEC_CTRL RSB
-  (XEN)   XPTI (64-bit PV only): Dom0 enabled, DomU enabled
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Wei Liu <wei.liu2@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
-Release-acked-by: Juergen Gross <jgross@suse.com>
-(cherry picked from commit 3352afc26c497d26ecb70527db3cb29daf7b1422)
----
- docs/misc/xen-command-line.markdown |  49 +++++++++++
- xen/arch/x86/spec_ctrl.c            | 160 ++++++++++++++++++++++++++++++------
- 2 files changed, 186 insertions(+), 23 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
-index 6c673ee..43a6ddb 100644
---- a/docs/misc/xen-command-line.markdown
-+++ b/docs/misc/xen-command-line.markdown
-@@ -248,6 +248,9 @@ the NMI watchdog is also enabled.
- ### bti (x86)
- > `= List of [ thunk=retpoline|lfence|jmp, ibrs=<bool>, ibpb=<bool>, rsb_{vmexit,native}=<bool> ]`
- 
-+**WARNING: This command line option is deprecated, and superseded by
-+_spec-ctrl=_ - using both options in combination is undefined.**
-+
- Branch Target Injection controls.  By default, Xen will pick the most
- appropriate BTI mitigations based on compiled in support, loaded microcode,
- and hardware details.
-@@ -1698,6 +1701,52 @@ enforces the maximum theoretically necessary timeout of 670ms. Any number
- is being interpreted as a custom timeout in milliseconds. Zero or boolean
- false disable the quirk workaround, which is also the default.
- 
-+### spec-ctrl (x86)
-+> `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>,
-+>              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb}=<bool> ]`
-+
-+Controls for speculative execution sidechannel mitigations.  By default, Xen
-+will pick the most appropriate mitigations based on compiled in support,
-+loaded microcode, and hardware details, and will virtualise appropriate
-+mitigations for guests to use.
-+
-+**WARNING: Any use of this option may interfere with heuristics.  Use with
-+extreme care.**
-+
-+An overall boolean value, `spec-ctrl=no`, can be specified to turn off all
-+mitigations, including pieces of infrastructure used to virtualise certain
-+mitigation features for guests.  Alternatively, a slightly more restricted
-+`spec-ctrl=no-xen` can be used to turn off all of Xen's mitigations, while
-+leaving the virtualisation support in place for guests to use.  Use of a
-+positive boolean value for either of these options is invalid.
-+
-+The booleans `pv=`, `hvm=`, `msr-sc=` and `rsb=` offer fine grained control
-+over the alternative blocks used by Xen.  These impact Xen's ability to
-+protect itself, and Xen's ability to virtualise support for guests to use.
-+
-+* `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests
-+  respectively.
-+* `msr-sc=` offers control over Xen's support for manipulating MSR\_SPEC\_CTRL
-+  on entry and exit.  These blocks are necessary to virtualise support for
-+  guests and if disabled, guests will be unable to use IBRS/STIBP/etc.
-+* `rsb=` offers control over whether to overwrite the Return Stack Buffer /
-+  Return Address Stack on entry to Xen.
-+
-+If Xen was compiled with INDIRECT\_THUNK support, `bti-thunk=` can be used to
-+select which of the thunks gets patched into the `__x86_indirect_thunk_%reg`
-+locations.  The default thunk is `retpoline` (generally preferred for Intel
-+hardware), with the alternatives being `jmp` (a `jmp *%reg` gadget, minimal
-+overhead), and `lfence` (an `lfence; jmp *%reg` gadget, preferred for AMD).
-+
-+On hardware supporting IBRS (Indirect Branch Restricted Speculation), the
-+`ibrs=` option can be used to force or prevent Xen using the feature itself.
-+If Xen is not using IBRS itself, functionality is still set up so IBRS can be
-+virtualised for guests.
-+
-+On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=`
-+option can be used to force (the default) or prevent Xen from issuing branch
-+prediction barriers on vcpu context switches.
-+
- ### sync\_console
- > `= <boolean>`
- 
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 3adec1a..4f9282f 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -26,6 +26,13 @@
- #include <asm/spec_ctrl.h>
- #include <asm/spec_ctrl_asm.h>
- 
-+/* Cmdline controls for Xen's alternative blocks. */
-+static bool __initdata opt_msr_sc_pv = true;
-+static bool __initdata opt_msr_sc_hvm = true;
-+static bool __initdata opt_rsb_pv = true;
-+static bool __initdata opt_rsb_hvm = true;
-+
-+/* Cmdline controls for Xen's speculative settings. */
- static enum ind_thunk {
-     THUNK_DEFAULT, /* Decide which thunk to use at boot time. */
-     THUNK_NONE,    /* Missing compiler support for thunks. */
-@@ -35,8 +42,6 @@ static enum ind_thunk {
-     THUNK_JMP,
- } opt_thunk __initdata = THUNK_DEFAULT;
- static int8_t __initdata opt_ibrs = -1;
--static bool __initdata opt_rsb_pv = true;
--static bool __initdata opt_rsb_hvm = true;
- bool __read_mostly opt_ibpb = true;
- 
- bool __initdata bsp_delay_spec_ctrl;
-@@ -84,8 +89,95 @@ static int __init parse_bti(const char *s)
- }
- custom_param("bti", parse_bti);
- 
-+static int __init parse_spec_ctrl(const char *s)
-+{
-+    const char *ss;
-+    int val, rc = 0;
-+
-+    do {
-+        ss = strchr(s, ',');
-+        if ( !ss )
-+            ss = strchr(s, '\0');
-+
-+        /* Global and Xen-wide disable. */
-+        val = parse_bool(s, ss);
-+        if ( !val )
-+        {
-+            opt_msr_sc_pv = false;
-+            opt_msr_sc_hvm = false;
-+
-+        disable_common:
-+            opt_rsb_pv = false;
-+            opt_rsb_hvm = false;
-+
-+            opt_thunk = THUNK_JMP;
-+            opt_ibrs = 0;
-+            opt_ibpb = false;
-+        }
-+        else if ( val > 0 )
-+            rc = -EINVAL;
-+        else if ( (val = parse_boolean("xen", s, ss)) >= 0 )
-+        {
-+            if ( !val )
-+                goto disable_common;
-+
-+            rc = -EINVAL;
-+        }
-+
-+        /* Xen's alternative blocks. */
-+        else if ( (val = parse_boolean("pv", s, ss)) >= 0 )
-+        {
-+            opt_msr_sc_pv = val;
-+            opt_rsb_pv = val;
-+        }
-+        else if ( (val = parse_boolean("hvm", s, ss)) >= 0 )
-+        {
-+            opt_msr_sc_hvm = val;
-+            opt_rsb_hvm = val;
-+        }
-+        else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 )
-+        {
-+            opt_msr_sc_pv = val;
-+            opt_msr_sc_hvm = val;
-+        }
-+        else if ( (val = parse_boolean("rsb", s, ss)) >= 0 )
-+        {
-+            opt_rsb_pv = val;
-+            opt_rsb_hvm = val;
-+        }
-+
-+        /* Xen's speculative sidechannel mitigation settings. */
-+        else if ( !strncmp(s, "bti-thunk=", 10) )
-+        {
-+            s += 10;
-+
-+            if ( !strncmp(s, "retpoline", ss - s) )
-+                opt_thunk = THUNK_RETPOLINE;
-+            else if ( !strncmp(s, "lfence", ss - s) )
-+                opt_thunk = THUNK_LFENCE;
-+            else if ( !strncmp(s, "jmp", ss - s) )
-+                opt_thunk = THUNK_JMP;
-+            else
-+                rc = -EINVAL;
-+        }
-+        else if ( (val = parse_boolean("ibrs", s, ss)) >= 0 )
-+            opt_ibrs = val;
-+        else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
-+            opt_ibpb = val;
-+        else
-+            rc = -EINVAL;
-+
-+        s = ss + 1;
-+    } while ( *ss );
-+
-+    return rc;
-+}
-+custom_param("spec-ctrl", parse_spec_ctrl);
-+
- static void __init print_details(enum ind_thunk thunk, uint64_t caps)
- {
-+    bool use_spec_ctrl = (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
-+                          boot_cpu_has(X86_FEATURE_SC_MSR_HVM));
-     unsigned int _7d0 = 0, e8b = 0, tmp;
- 
-     /* Collect diagnostics about available mitigations. */
-@@ -94,10 +186,10 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-     if ( boot_cpu_data.extended_cpuid_level >= 0x80000008 )
-         cpuid(0x80000008, &tmp, &e8b, &tmp, &tmp);
- 
--    printk(XENLOG_DEBUG "Speculative mitigation facilities:\n");
-+    printk("Speculative mitigation facilities:\n");
- 
-     /* Hardware features which pertain to speculative mitigations. */
--    printk(XENLOG_DEBUG "  Hardware features:%s%s%s%s%s%s\n",
-+    printk("  Hardware features:%s%s%s%s%s%s\n",
-            (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
-            (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP"     : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBPB))  ? " IBPB"      : "",
-@@ -107,20 +199,31 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
- 
-     /* Compiled-in support which pertains to BTI mitigations. */
-     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
--        printk(XENLOG_DEBUG "  Compiled-in support: INDIRECT_THUNK\n");
-+        printk("  Compiled-in support: INDIRECT_THUNK\n");
- 
--    printk("BTI mitigations: Thunk %s, Others:%s%s%s%s\n",
-+    /* Settings for Xen's protection, irrespective of guests. */
-+    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s, Other:%s\n",
-            thunk == THUNK_NONE      ? "N/A" :
-            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
-            thunk == THUNK_LFENCE    ? "LFENCE" :
-            thunk == THUNK_JMP       ? "JMP" : "?",
-+           !use_spec_ctrl                            ?  "No" :
-+           (default_xen_spec_ctrl & SPEC_CTRL_IBRS)  ?  "IBRS+" :  "IBRS-",
-+           opt_ibpb                                  ? " IBPB"  : "");
-+
-+    /*
-+     * Alternatives blocks for protecting against and/or virtualising
-+     * mitigation support for guests.
-+     */
-+    printk("  Support for VMs: PV:%s%s%s, HVM:%s%s%s\n",
-            (boot_cpu_has(X86_FEATURE_SC_MSR_PV) ||
--            boot_cpu_has(X86_FEATURE_SC_MSR_HVM)) ?
--           default_xen_spec_ctrl & SPEC_CTRL_IBRS    ? " IBRS+" :
--                                                       " IBRS-"      : "",
--           opt_ibpb                                  ? " IBPB"       : "",
--           boot_cpu_has(X86_FEATURE_SC_RSB_PV)       ? " RSB_NATIVE" : "",
--           boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB_VMEXIT" : "");
-+            boot_cpu_has(X86_FEATURE_SC_RSB_PV))     ? ""               : " None",
-+           boot_cpu_has(X86_FEATURE_SC_MSR_PV)       ? " MSR_SPEC_CTRL" : "",
-+           boot_cpu_has(X86_FEATURE_SC_RSB_PV)       ? " RSB"           : "",
-+           (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ||
-+            boot_cpu_has(X86_FEATURE_SC_RSB_HVM))    ? ""               : " None",
-+           boot_cpu_has(X86_FEATURE_SC_MSR_HVM)      ? " MSR_SPEC_CTRL" : "",
-+           boot_cpu_has(X86_FEATURE_SC_RSB_HVM)      ? " RSB"           : "");
- 
-     printk("XPTI: %s\n",
-            boot_cpu_has(X86_FEATURE_NO_XPTI) ? "disabled" : "enabled");
-@@ -212,7 +315,7 @@ static bool __init retpoline_safe(uint64_t caps)
- void __init init_speculation_mitigations(void)
- {
-     enum ind_thunk thunk = THUNK_DEFAULT;
--    bool ibrs = false;
-+    bool use_spec_ctrl = false, ibrs = false;
-     uint64_t caps = 0;
- 
-     if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
-@@ -282,20 +385,31 @@ void __init init_speculation_mitigations(void)
-     else if ( thunk == THUNK_JMP )
-         setup_force_cpu_cap(X86_FEATURE_IND_THUNK_JMP);
- 
-+    /*
-+     * If we are on hardware supporting MSR_SPEC_CTRL, see about setting up
-+     * the alternatives blocks so we can virtualise support for guests.
-+     */
-     if ( boot_cpu_has(X86_FEATURE_IBRSB) )
-     {
--        /*
--         * Even if we've chosen to not have IBRS set in Xen context, we still
--         * need the IBRS entry/exit logic to virtualise IBRS support for
--         * guests.
--         */
--        setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV);
--        setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM);
-+        if ( opt_msr_sc_pv )
-+        {
-+            use_spec_ctrl = true;
-+            setup_force_cpu_cap(X86_FEATURE_SC_MSR_PV);
-+        }
- 
--        if ( ibrs )
--            default_xen_spec_ctrl |= SPEC_CTRL_IBRS;
-+        if ( opt_msr_sc_hvm )
-+        {
-+            use_spec_ctrl = true;
-+            setup_force_cpu_cap(X86_FEATURE_SC_MSR_HVM);
-+        }
-+
-+        if ( use_spec_ctrl )
-+        {
-+            if ( ibrs )
-+                default_xen_spec_ctrl |= SPEC_CTRL_IBRS;
- 
--        default_spec_ctrl_flags |= SCF_ist_wrmsr;
-+            default_spec_ctrl_flags |= SCF_ist_wrmsr;
-+        }
-     }
- 
-     /*
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0011-x86-AMD-Mitigations-for-GPZ-SP4-Speculative-Store-By.patch b/system/xen/xsa/xsa263-4.10-0011-x86-AMD-Mitigations-for-GPZ-SP4-Speculative-Store-By.patch
deleted file mode 100644
index 8603f1d56e..0000000000
--- a/system/xen/xsa/xsa263-4.10-0011-x86-AMD-Mitigations-for-GPZ-SP4-Speculative-Store-By.patch
+++ /dev/null
@@ -1,123 +0,0 @@
-From 918320daf34931cd5c1c0d9c439ce853f6575970 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Thu, 26 Apr 2018 10:56:28 +0100
-Subject: [PATCH] x86/AMD: Mitigations for GPZ SP4 - Speculative Store Bypass
-
-AMD processors will execute loads and stores with the same base register in
-program order, which is typically how a compiler emits code.
-
-Therefore, by default no mitigating actions are taken, despite there being
-corner cases which are vulnerable to the issue.
-
-For performance testing, or for users with particularly sensitive workloads,
-the `spec-ctrl=ssbd` command line option is available to force Xen to disable
-Memory Disambiguation on applicable hardware.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
- docs/misc/xen-command-line.markdown |  7 ++++++-
- xen/arch/x86/cpu/amd.c              | 20 ++++++++++++++++++++
- xen/arch/x86/spec_ctrl.c            |  3 +++
- xen/include/asm-x86/spec_ctrl.h     |  1 +
- 4 files changed, 30 insertions(+), 1 deletion(-)
-
-diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
-index 43a6ddb..4e0e580 100644
---- a/docs/misc/xen-command-line.markdown
-+++ b/docs/misc/xen-command-line.markdown
-@@ -1703,7 +1703,7 @@ false disable the quirk workaround, which is also the default.
- 
- ### spec-ctrl (x86)
- > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>,
-->              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb}=<bool> ]`
-+>              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd}=<bool> ]`
- 
- Controls for speculative execution sidechannel mitigations.  By default, Xen
- will pick the most appropriate mitigations based on compiled in support,
-@@ -1747,6 +1747,11 @@ On hardware supporting IBPB (Indirect Branch Prediction Barrier), the `ibpb=`
- option can be used to force (the default) or prevent Xen from issuing branch
- prediction barriers on vcpu context switches.
- 
-+On hardware supporting SSBD (Speculative Store Bypass Disable), the `ssbd=`
-+option can be used to force or prevent Xen using the feature itself.  On AMD
-+hardware, this is a global option applied at boot, and not virtualised for
-+guest use.
-+
- ### sync\_console
- > `= <boolean>`
- 
-diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
-index fc9677f..458a3fe 100644
---- a/xen/arch/x86/cpu/amd.c
-+++ b/xen/arch/x86/cpu/amd.c
-@@ -9,6 +9,7 @@
- #include <asm/amd.h>
- #include <asm/hvm/support.h>
- #include <asm/setup.h> /* amd_init_cpu */
-+#include <asm/spec_ctrl.h>
- #include <asm/acpi.h>
- #include <asm/apic.h>
- 
-@@ -594,6 +595,25 @@ static void init_amd(struct cpuinfo_x86 *c)
- 				  c->x86_capability);
- 	}
- 
-+	/*
-+	 * If the user has explicitly chosen to disable Memory Disambiguation
-+	 * to mitigiate Speculative Store Bypass, poke the appropriate MSR.
-+	 */
-+	if (opt_ssbd) {
-+		int bit = -1;
-+
-+		switch (c->x86) {
-+		case 0x15: bit = 54; break;
-+		case 0x16: bit = 33; break;
-+		case 0x17: bit = 10; break;
-+		}
-+
-+		if (bit >= 0 && !rdmsr_safe(MSR_AMD64_LS_CFG, value)) {
-+			value |= 1ull << bit;
-+			wrmsr_safe(MSR_AMD64_LS_CFG, value);
-+		}
-+	}
-+
- 	/* MFENCE stops RDTSC speculation */
- 	if (!cpu_has_lfence_dispatch)
- 		__set_bit(X86_FEATURE_MFENCE_RDTSC, c->x86_capability);
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index 4f9282f..e326056 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -43,6 +43,7 @@ static enum ind_thunk {
- } opt_thunk __initdata = THUNK_DEFAULT;
- static int8_t __initdata opt_ibrs = -1;
- bool __read_mostly opt_ibpb = true;
-+bool __read_mostly opt_ssbd = false;
- 
- bool __initdata bsp_delay_spec_ctrl;
- uint8_t __read_mostly default_xen_spec_ctrl;
-@@ -164,6 +165,8 @@ static int __init parse_spec_ctrl(const char *s)
-             opt_ibrs = val;
-         else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 )
-             opt_ibpb = val;
-+        else if ( (val = parse_boolean("ssbd", s, ss)) >= 0 )
-+            opt_ssbd = val;
-         else
-             rc = -EINVAL;
- 
-diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
-index c6a38f4..4678a40 100644
---- a/xen/include/asm-x86/spec_ctrl.h
-+++ b/xen/include/asm-x86/spec_ctrl.h
-@@ -27,6 +27,7 @@
- void init_speculation_mitigations(void);
- 
- extern bool opt_ibpb;
-+extern bool opt_ssbd;
- 
- extern bool bsp_delay_spec_ctrl;
- extern uint8_t default_xen_spec_ctrl;
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0012-x86-Intel-Mitigations-for-GPZ-SP4-Speculative-Store-.patch b/system/xen/xsa/xsa263-4.10-0012-x86-Intel-Mitigations-for-GPZ-SP4-Speculative-Store-.patch
deleted file mode 100644
index 7f2556d42b..0000000000
--- a/system/xen/xsa/xsa263-4.10-0012-x86-Intel-Mitigations-for-GPZ-SP4-Speculative-Store-.patch
+++ /dev/null
@@ -1,224 +0,0 @@
-From db6adc8e55dd43a1b4bb20e06a69475c503cb934 Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Wed, 28 Mar 2018 15:21:39 +0100
-Subject: [PATCH] x86/Intel: Mitigations for GPZ SP4 - Speculative Store Bypass
-
-To combat GPZ SP4 "Speculative Store Bypass", Intel have extended their
-speculative sidechannel mitigations specification as follows:
-
- * A feature bit to indicate that Speculative Store Bypass Disable is
-   supported.
- * A new bit in MSR_SPEC_CTRL which, when set, disables memory disambiguation
-   in the pipeline.
- * A new bit in MSR_ARCH_CAPABILITIES, which will be set in future hardware,
-   indicating that the hardware is not susceptible to Speculative Store Bypass
-   sidechannels.
-
-For contemporary processors, this interface will be implemented via a
-microcode update.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
- docs/misc/xen-command-line.markdown         | 12 +++++++-----
- tools/libxl/libxl_cpuid.c                   |  1 +
- tools/misc/xen-cpuid.c                      |  3 +--
- xen/arch/x86/cpuid.c                        |  5 +++++
- xen/arch/x86/spec_ctrl.c                    | 15 ++++++++++++---
- xen/include/asm-x86/msr-index.h             |  2 ++
- xen/include/public/arch-x86/cpufeatureset.h |  1 +
- xen/tools/gen-cpuid.py                      | 17 +++++++++++++----
- 8 files changed, 42 insertions(+), 14 deletions(-)
-
-diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
-index 4e0e580..107889d 100644
---- a/docs/misc/xen-command-line.markdown
-+++ b/docs/misc/xen-command-line.markdown
-@@ -496,9 +496,10 @@ accounting for hardware capabilities as enumerated via CPUID.
- 
- Currently accepted:
- 
--The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb` are used by
--default if avaiable.  They can be ignored, e.g. `no-ibrsb`, at which point Xen
--won't use them itself, and won't offer them to guests.
-+The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb`, `ssbd` are
-+used by default if available and applicable.  They can be ignored,
-+e.g. `no-ibrsb`, at which point Xen won't use them itself, and won't offer
-+them to guests.
- 
- ### cpuid\_mask\_cpu (AMD only)
- > `= fam_0f_rev_c | fam_0f_rev_d | fam_0f_rev_e | fam_0f_rev_f | fam_0f_rev_g | fam_10_rev_b | fam_10_rev_c | fam_11_rev_b`
-@@ -1728,7 +1729,7 @@ protect itself, and Xen's ability to virtualise support for guests to use.
-   respectively.
- * `msr-sc=` offers control over Xen's support for manipulating MSR\_SPEC\_CTRL
-   on entry and exit.  These blocks are necessary to virtualise support for
--  guests and if disabled, guests will be unable to use IBRS/STIBP/etc.
-+  guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc.
- * `rsb=` offers control over whether to overwrite the Return Stack Buffer /
-   Return Address Stack on entry to Xen.
- 
-@@ -1750,7 +1751,8 @@ prediction barriers on vcpu context switches.
- On hardware supporting SSBD (Speculative Store Bypass Disable), the `ssbd=`
- option can be used to force or prevent Xen using the feature itself.  On AMD
- hardware, this is a global option applied at boot, and not virtualised for
--guest use.
-+guest use.  On Intel hardware, the feature is virtualised for guests,
-+independently of Xen's choice of setting.
- 
- ### sync\_console
- > `= <boolean>`
-diff --git a/tools/libxl/libxl_cpuid.c b/tools/libxl/libxl_cpuid.c
-index 3a21f4e..7b0f594 100644
---- a/tools/libxl/libxl_cpuid.c
-+++ b/tools/libxl/libxl_cpuid.c
-@@ -205,6 +205,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str)
-         {"ibrsb",        0x00000007,  0, CPUID_REG_EDX, 26,  1},
-         {"stibp",        0x00000007,  0, CPUID_REG_EDX, 27,  1},
-         {"arch-caps",    0x00000007,  0, CPUID_REG_EDX, 29,  1},
-+        {"ssbd",         0x00000007,  0, CPUID_REG_EDX, 31,  1},
- 
-         {"lahfsahf",     0x80000001, NA, CPUID_REG_ECX,  0,  1},
-         {"cmplegacy",    0x80000001, NA, CPUID_REG_ECX,  1,  1},
-diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
-index b1a46c6..2483a81 100644
---- a/tools/misc/xen-cpuid.c
-+++ b/tools/misc/xen-cpuid.c
-@@ -166,8 +166,7 @@ static const char *str_7d0[32] =
- 
-     [26] = "ibrsb",         [27] = "stibp",
-     [28] = "REZ",           [29] = "arch_caps",
--
--    [30 ... 31] = "REZ",
-+    [30] = "REZ",           [31] = "ssbd",
- };
- 
- static struct {
-diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
-index b45b145..6a710b7 100644
---- a/xen/arch/x86/cpuid.c
-+++ b/xen/arch/x86/cpuid.c
-@@ -43,6 +43,11 @@ static int __init parse_xen_cpuid(const char *s)
-             if ( !val )
-                 setup_clear_cpu_cap(X86_FEATURE_STIBP);
-         }
-+        else if ( (val = parse_boolean("ssbd", s, ss)) >= 0 )
-+        {
-+            if ( !val )
-+                setup_clear_cpu_cap(X86_FEATURE_SSBD);
-+        }
-         else
-             rc = -EINVAL;
- 
-diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
-index e326056..89e3825 100644
---- a/xen/arch/x86/spec_ctrl.c
-+++ b/xen/arch/x86/spec_ctrl.c
-@@ -192,26 +192,31 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
-     printk("Speculative mitigation facilities:\n");
- 
-     /* Hardware features which pertain to speculative mitigations. */
--    printk("  Hardware features:%s%s%s%s%s%s\n",
-+    printk("  Hardware features:%s%s%s%s%s%s%s%s\n",
-            (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
-            (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP"     : "",
-+           (_7d0 & cpufeat_mask(X86_FEATURE_SSBD))  ? " SSBD"      : "",
-            (e8b  & cpufeat_mask(X86_FEATURE_IBPB))  ? " IBPB"      : "",
-            (caps & ARCH_CAPABILITIES_IBRS_ALL)      ? " IBRS_ALL"  : "",
-            (caps & ARCH_CAPABILITIES_RDCL_NO)       ? " RDCL_NO"   : "",
--           (caps & ARCH_CAPS_RSBA)                  ? " RSBA"      : "");
-+           (caps & ARCH_CAPS_RSBA)                  ? " RSBA"      : "",
-+           (caps & ARCH_CAPS_SSB_NO)                ? " SSB_NO"    : "");
- 
-     /* Compiled-in support which pertains to BTI mitigations. */
-     if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
-         printk("  Compiled-in support: INDIRECT_THUNK\n");
- 
-     /* Settings for Xen's protection, irrespective of guests. */
--    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s, Other:%s\n",
-+    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s\n",
-            thunk == THUNK_NONE      ? "N/A" :
-            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
-            thunk == THUNK_LFENCE    ? "LFENCE" :
-            thunk == THUNK_JMP       ? "JMP" : "?",
-            !use_spec_ctrl                            ?  "No" :
-            (default_xen_spec_ctrl & SPEC_CTRL_IBRS)  ?  "IBRS+" :  "IBRS-",
-+           !use_spec_ctrl || !boot_cpu_has(X86_FEATURE_SSBD)
-+                                                     ? "" :
-+           (default_xen_spec_ctrl & SPEC_CTRL_SSBD)  ? " SSBD+" : " SSBD-",
-            opt_ibpb                                  ? " IBPB"  : "");
- 
-     /*
-@@ -415,6 +420,10 @@ void __init init_speculation_mitigations(void)
-         }
-     }
- 
-+    /* If we have SSBD available, see whether we should use it. */
-+    if ( boot_cpu_has(X86_FEATURE_SSBD) && use_spec_ctrl && opt_ssbd )
-+        default_xen_spec_ctrl |= SPEC_CTRL_SSBD;
-+
-     /*
-      * PV guests can poison the RSB to any virtual address from which
-      * they can execute a call instruction.  This is necessarily outside
-diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
-index 68fae91..93d6f4e 100644
---- a/xen/include/asm-x86/msr-index.h
-+++ b/xen/include/asm-x86/msr-index.h
-@@ -38,6 +38,7 @@
- #define MSR_SPEC_CTRL			0x00000048
- #define SPEC_CTRL_IBRS			(_AC(1, ULL) << 0)
- #define SPEC_CTRL_STIBP			(_AC(1, ULL) << 1)
-+#define SPEC_CTRL_SSBD			(_AC(1, ULL) << 2)
- 
- #define MSR_PRED_CMD			0x00000049
- #define PRED_CMD_IBPB			(_AC(1, ULL) << 0)
-@@ -46,6 +47,7 @@
- #define ARCH_CAPABILITIES_RDCL_NO	(_AC(1, ULL) << 0)
- #define ARCH_CAPABILITIES_IBRS_ALL	(_AC(1, ULL) << 1)
- #define ARCH_CAPS_RSBA			(_AC(1, ULL) << 2)
-+#define ARCH_CAPS_SSB_NO		(_AC(1, ULL) << 4)
- 
- /* Intel MSRs. Some also available on other CPUs */
- #define MSR_IA32_PERFCTR0		0x000000c1
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index 8da5783..7acf822 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -245,6 +245,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A  AVX512 Multiply Accumulation Single
- XEN_CPUFEATURE(IBRSB,         9*32+26) /*A  IBRS and IBPB support (used by Intel) */
- XEN_CPUFEATURE(STIBP,         9*32+27) /*A! STIBP */
- XEN_CPUFEATURE(ARCH_CAPS,     9*32+29) /*   IA32_ARCH_CAPABILITIES MSR */
-+XEN_CPUFEATURE(SSBD,          9*32+31) /*   MSR_SPEC_CTRL.SSBD available */
- 
- #endif /* XEN_CPUFEATURE */
- 
-diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py
-index 613b909..65526ff 100755
---- a/xen/tools/gen-cpuid.py
-+++ b/xen/tools/gen-cpuid.py
-@@ -257,10 +257,19 @@ def crunch_numbers(state):
-                   AVX512BW, AVX512VL, AVX512VBMI, AVX512_4VNNIW,
-                   AVX512_4FMAPS, AVX512_VPOPCNTDQ],
- 
--        # Single Thread Indirect Branch Predictors enumerates a new bit in the
--        # MSR enumerated by Indirect Branch Restricted Speculation/Indirect
--        # Branch Prediction Barrier enumeration.
--        IBRSB: [STIBP],
-+        # The features:
-+        #   * Single Thread Indirect Branch Predictors
-+        #   * Speculative Store Bypass Disable
-+        #
-+        # enumerate new bits in MSR_SPEC_CTRL, which is enumerated by Indirect
-+        # Branch Restricted Speculation/Indirect Branch Prediction Barrier.
-+        #
-+        # In practice, these features also enumerate the presense of
-+        # MSR_SPEC_CTRL.  However, no real hardware will exist with SSBD but
-+        # not IBRSB, and we pass this MSR directly to guests.  Treating them
-+        # as dependent features simplifies Xen's logic, and prevents the guest
-+        # from seeing implausible configurations.
-+        IBRSB: [STIBP, SSBD],
-     }
- 
-     deep_features = tuple(sorted(deps.keys()))
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa263-4.10-0013-x86-msr-Virtualise-MSR_SPEC_CTRL.SSBD-for-guests-to-.patch b/system/xen/xsa/xsa263-4.10-0013-x86-msr-Virtualise-MSR_SPEC_CTRL.SSBD-for-guests-to-.patch
deleted file mode 100644
index cb8cdb3c56..0000000000
--- a/system/xen/xsa/xsa263-4.10-0013-x86-msr-Virtualise-MSR_SPEC_CTRL.SSBD-for-guests-to-.patch
+++ /dev/null
@@ -1,70 +0,0 @@
-From 02d0027a89dc49875a41e939498936874a32360f Mon Sep 17 00:00:00 2001
-From: Andrew Cooper <andrew.cooper3@citrix.com>
-Date: Fri, 13 Apr 2018 15:42:34 +0000
-Subject: [PATCH] x86/msr: Virtualise MSR_SPEC_CTRL.SSBD for guests to use
-
-Almost all infrastructure is already in place.  Update the reserved bits
-calculation in guest_wrmsr(), and offer SSBD to guests by default.
-
-Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
-Reviewed-by: Jan Beulich <jbeulich@suse.com>
----
- xen/arch/x86/msr.c                          | 8 ++++++--
- xen/include/public/arch-x86/cpufeatureset.h | 2 +-
- 2 files changed, 7 insertions(+), 3 deletions(-)
-
-diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
-index 48d061d..21219c4 100644
---- a/xen/arch/x86/msr.c
-+++ b/xen/arch/x86/msr.c
-@@ -178,6 +178,8 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
- 
-     switch ( msr )
-     {
-+        uint64_t rsvd;
-+
-     case MSR_INTEL_PLATFORM_INFO:
-     case MSR_ARCH_CAPABILITIES:
-         /* Read-only */
-@@ -213,8 +215,10 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
-          * Note: SPEC_CTRL_STIBP is specified as safe to use (i.e. ignored)
-          * when STIBP isn't enumerated in hardware.
-          */
-+        rsvd = ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP |
-+                 (cp->feat.ssbd ? SPEC_CTRL_SSBD : 0));
- 
--        if ( val & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP) )
-+        if ( val & rsvd )
-             goto gp_fault; /* Rsvd bit set? */
- 
-         vp->spec_ctrl.raw = val;
-@@ -233,12 +237,12 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
- 
-     case MSR_INTEL_MISC_FEATURES_ENABLES:
-     {
--        uint64_t rsvd = ~0ull;
-         bool old_cpuid_faulting = vp->misc_features_enables.cpuid_faulting;
- 
-         if ( !vp->misc_features_enables.available )
-             goto gp_fault;
- 
-+        rsvd = ~0ull;
-         if ( dp->plaform_info.cpuid_faulting )
-             rsvd &= ~MSR_MISC_FEATURES_CPUID_FAULTING;
- 
-diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
-index 7acf822..c721c12 100644
---- a/xen/include/public/arch-x86/cpufeatureset.h
-+++ b/xen/include/public/arch-x86/cpufeatureset.h
-@@ -245,7 +245,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A  AVX512 Multiply Accumulation Single
- XEN_CPUFEATURE(IBRSB,         9*32+26) /*A  IBRS and IBPB support (used by Intel) */
- XEN_CPUFEATURE(STIBP,         9*32+27) /*A! STIBP */
- XEN_CPUFEATURE(ARCH_CAPS,     9*32+29) /*   IA32_ARCH_CAPABILITIES MSR */
--XEN_CPUFEATURE(SSBD,          9*32+31) /*   MSR_SPEC_CTRL.SSBD available */
-+XEN_CPUFEATURE(SSBD,          9*32+31) /*A  MSR_SPEC_CTRL.SSBD available */
- 
- #endif /* XEN_CPUFEATURE */
- 
--- 
-2.1.4
-
diff --git a/system/xen/xsa/xsa273-d757c29ffe2e31b15397e43cd58da88b6318b654.patch b/system/xen/xsa/xsa273-d757c29ffe2e31b15397e43cd58da88b6318b654.patch
new file mode 100644
index 0000000000..ab9794df5e
--- /dev/null
+++ b/system/xen/xsa/xsa273-d757c29ffe2e31b15397e43cd58da88b6318b654.patch
@@ -0,0 +1,4115 @@
+diff --git a/docs/man/xl.conf.pod.5 b/docs/man/xl.conf.pod.5
+index da91b8626c..37262a7ef8 100644
+--- a/docs/man/xl.conf.pod.5
++++ b/docs/man/xl.conf.pod.5
+@@ -185,6 +185,28 @@ massively huge guests).
+ 
+ =back
+ 
++=item B<vm.cpumask>="CPULIST"
++
++=item B<vm.hvm.cpumask>="CPULIST"
++
++=item B<vm.pv.cpumask>="CPULIST"
++
++Global masks that are applied when creating guests and pinning vcpus
++to indicate which cpus they are allowed to run on.  Specifically,
++C<vm.cpumask> applies to all guest types, C<vm.hvm.cpumask> applies to
++both HVM and PVH guests and C<vm.pv.cpumask> applies to PV guests.
++
++The hard affinity of guest's vcpus are logical-AND'ed with respective
++masks. If the resulting affinity mask is empty, operation will fail.
++
++Use --ignore-global-affinity-masks to skip applying global masks.
++
++The default value for these masks are all 1's, i.e. all cpus are allowed.
++
++Due to bug(s), these options may not interact well with other options
++concerning CPU affinity. One example is CPU pools. Users should always double
++check that the required affinity has taken effect.
++
+ =back
+ 
+ =head1 SEE ALSO
+diff --git a/docs/misc/xen-command-line.markdown b/docs/misc/xen-command-line.markdown
+index 075e5ea159..0886706368 100644
+--- a/docs/misc/xen-command-line.markdown
++++ b/docs/misc/xen-command-line.markdown
+@@ -489,10 +489,10 @@ accounting for hardware capabilities as enumerated via CPUID.
+ 
+ Currently accepted:
+ 
+-The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb`, `ssbd` are
+-used by default if available and applicable.  They can be ignored,
+-e.g. `no-ibrsb`, at which point Xen won't use them itself, and won't offer
+-them to guests.
++The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb`,
++`l1d-flush` and `ssbd` are used by default if available and applicable.  They can
++be ignored, e.g. `no-ibrsb`, at which point Xen won't use them itself, and
++won't offer them to guests.
+ 
+ ### cpuid\_mask\_cpu (AMD only)
+ > `= fam_0f_rev_c | fam_0f_rev_d | fam_0f_rev_e | fam_0f_rev_f | fam_0f_rev_g | fam_10_rev_b | fam_10_rev_c | fam_11_rev_b`
+@@ -936,6 +936,8 @@ version are 1 and 2.
+ use of grant table v2 without transitive grants is an ABI breakage from the
+ guests point of view.
+ 
++The usage of gnttab v2 is not security supported on ARM platforms.
++
+ ### gnttab\_max\_frames
+ > `= <integer>`
+ 
+@@ -1544,6 +1546,30 @@ do; there may be other custom operating systems which do.  If you're
+ certain you don't plan on having PV guests which use this feature,
+ turning it off can reduce the attack surface.
+ 
++### pv-l1tf (x86)
++> `= List of [ <bool>, dom0=<bool>, domu=<bool> ]`
++
++> Default: `false` on believed-unaffected hardware, or in pv-shim mode.
++>          `domu`  on believed-affected hardware.
++
++Mitigations for L1TF / XSA-273 / CVE-2018-3620 for PV guests.
++
++For backwards compatibility, we may not alter an architecturally-legitimate
++pagetable entry a PV guest chooses to write.  We can however force such a
++guest into shadow mode so that Xen controls the PTEs which are reachable by
++the CPU pagewalk.
++
++Shadowing is performed at the point where a PV guest first tries to write an
++L1TF-vulnerable PTE.  Therefore, a PV guest kernel which has been updated with
++its own L1TF mitigations will not trigger shadow mode if it is well behaved.
++
++If CONFIG\_SHADOW\_PAGING is not compiled in, this mitigation instead crashes
++the guest when an L1TF-vulnerable PTE is written, which still allows updated,
++well-behaved PV guests to run, despite Shadow being compiled out.
++
++In the pv-shim case, Shadow is expected to be compiled out, and a malicious
++guest kernel can only leak data from the shim Xen, rather than the host Xen.
++
+ ### pv-shim (x86)
+ > `= <boolean>`
+ 
+@@ -1748,6 +1774,13 @@ Use `smap=hvm` to allow SMAP use by HVM guests only.
+ Flag to enable Supervisor Mode Execution Protection
+ Use `smep=hvm` to allow SMEP use by HVM guests only.
+ 
++### smt (x86)
++> `= <boolean>`
++
++Default: `true`
++
++Control bring up of multiple hyper-threads per CPU core.
++
+ ### snb\_igd\_quirk
+ > `= <boolean> | cap | <integer>`
+ 
+@@ -1758,7 +1791,8 @@ false disable the quirk workaround, which is also the default.
+ 
+ ### spec-ctrl (x86)
+ > `= List of [ <bool>, xen=<bool>, {pv,hvm,msr-sc,rsb}=<bool>,
+->              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu}=<bool> ]`
++>              bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu,
++>              l1d-flush}=<bool> ]`
+ 
+ Controls for speculative execution sidechannel mitigations.  By default, Xen
+ will pick the most appropriate mitigations based on compiled in support,
+@@ -1770,10 +1804,15 @@ extreme care.**
+ 
+ An overall boolean value, `spec-ctrl=no`, can be specified to turn off all
+ mitigations, including pieces of infrastructure used to virtualise certain
+-mitigation features for guests.  Alternatively, a slightly more restricted
+-`spec-ctrl=no-xen` can be used to turn off all of Xen's mitigations, while
+-leaving the virtualisation support in place for guests to use.  Use of a
+-positive boolean value for either of these options is invalid.
++mitigation features for guests.  This also includes settings which `xpti`,
++`smt`, `pv-l1tf` control, unless the respective option(s) have been
++specified earlier on the command line.
++
++Alternatively, a slightly more restricted `spec-ctrl=no-xen` can be used to
++turn off all of Xen's mitigations, while leaving the virtualisation support
++in place for guests to use.
++
++Use of a positive boolean value for either of these options is invalid.
+ 
+ The booleans `pv=`, `hvm=`, `msr-sc=` and `rsb=` offer fine grained control
+ over the alternative blocks used by Xen.  These impact Xen's ability to
+@@ -1813,6 +1852,12 @@ from using fully eager FPU context switches.  This is currently implemented as
+ a global control.  By default, Xen will choose to use fully eager context
+ switches on hardware believed to speculate past #NM exceptions.
+ 
++On hardware supporting L1D_FLUSH, the `l1d-flush=` option can be used to force
++or prevent Xen from issuing an L1 data cache flush on each VMEntry.
++Irrespective of Xen's setting, the feature is virtualised for HVM guests to
++use.  By default, Xen will enable this mitigation on hardware believed to be
++vulnerable to L1TF.
++
+ ### sync\_console
+ > `= <boolean>`
+ 
+diff --git a/tools/examples/xl.conf b/tools/examples/xl.conf
+index 374b6bbc2e..0446deb304 100644
+--- a/tools/examples/xl.conf
++++ b/tools/examples/xl.conf
+@@ -37,3 +37,8 @@
+ # (which can take a long time to find out if launching huge guests).
+ # see xl.conf(5) for details.
+ #claim_mode=1
++
++# Specify global vcpu hard affinity masks. See xl.conf(5) for details.
++#vm.cpumask="0-7"
++#vm.pv.cpumask="0-3"
++#vm.hvm.cpumask="3-7"
+diff --git a/tools/libxl/libxl_cpuid.c b/tools/libxl/libxl_cpuid.c
+index 7b0f594c3d..52e16c20ed 100644
+--- a/tools/libxl/libxl_cpuid.c
++++ b/tools/libxl/libxl_cpuid.c
+@@ -204,6 +204,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str)
+         {"avx512-4fmaps",0x00000007,  0, CPUID_REG_EDX,  3,  1},
+         {"ibrsb",        0x00000007,  0, CPUID_REG_EDX, 26,  1},
+         {"stibp",        0x00000007,  0, CPUID_REG_EDX, 27,  1},
++        {"l1d-flush",    0x00000007,  0, CPUID_REG_EDX, 28,  1},
+         {"arch-caps",    0x00000007,  0, CPUID_REG_EDX, 29,  1},
+         {"ssbd",         0x00000007,  0, CPUID_REG_EDX, 31,  1},
+ 
+diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c
+index e116339733..3888b4e158 100644
+--- a/tools/misc/xen-cpuid.c
++++ b/tools/misc/xen-cpuid.c
+@@ -143,7 +143,7 @@ static const char *str_7d0[32] =
+     [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps",
+ 
+     [26] = "ibrsb",         [27] = "stibp",
+-    /* 28 */                [29] = "arch_caps",
++    [28] = "l1d_flush",     [29] = "arch_caps",
+     /* 30 */                [31] = "ssbd",
+ };
+ 
+diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml
+index 13cf3b5bf4..5a8c377603 100644
+--- a/tools/ocaml/xenstored/store.ml
++++ b/tools/ocaml/xenstored/store.ml
+@@ -262,7 +262,8 @@ let path_write store perm path value =
+ 		Node.check_perm store.root perm Perms.WRITE;
+ 		Node.set_value store.root value, false
+ 	) else
+-		Path.apply_modify store.root path do_write, !node_created
++		let root = Path.apply_modify store.root path do_write in
++		root, !node_created
+ 
+ let path_rm store perm path =
+ 	let do_rm node name =
+diff --git a/tools/xl/xl.c b/tools/xl/xl.c
+index 179908b4f6..7d2142f16f 100644
+--- a/tools/xl/xl.c
++++ b/tools/xl/xl.c
+@@ -28,6 +28,9 @@
+ #include <libxl_utils.h>
+ #include <libxlutil.h>
+ #include "xl.h"
++#include "xl_parse.h"
++
++#include "xl_utils.h"
+ 
+ xentoollog_logger_stdiostream *logger;
+ int dryrun_only;
+@@ -42,6 +45,9 @@ char *default_gatewaydev = NULL;
+ char *default_vifbackend = NULL;
+ char *default_remus_netbufscript = NULL;
+ char *default_colo_proxy_script = NULL;
++libxl_bitmap global_vm_affinity_mask;
++libxl_bitmap global_hvm_affinity_mask;
++libxl_bitmap global_pv_affinity_mask;
+ enum output_format default_output_format = OUTPUT_FORMAT_JSON;
+ int claim_mode = 1;
+ bool progress_use_cr = 0;
+@@ -203,6 +209,26 @@ static void parse_global_config(const char *configfile,
+     if (!xlu_cfg_get_long (config, "max_maptrack_frames", &l, 0))
+         max_maptrack_frames = l;
+ 
++    libxl_bitmap_init(&global_vm_affinity_mask);
++    libxl_cpu_bitmap_alloc(ctx, &global_vm_affinity_mask, 0);
++    libxl_bitmap_init(&global_hvm_affinity_mask);
++    libxl_cpu_bitmap_alloc(ctx, &global_hvm_affinity_mask, 0);
++    libxl_bitmap_init(&global_pv_affinity_mask);
++    libxl_cpu_bitmap_alloc(ctx, &global_pv_affinity_mask, 0);
++
++    if (!xlu_cfg_get_string (config, "vm.cpumask", &buf, 0))
++        parse_cpurange(buf, &global_vm_affinity_mask);
++    else
++        libxl_bitmap_set_any(&global_vm_affinity_mask);
++    if (!xlu_cfg_get_string (config, "vm.hvm.cpumask", &buf, 0))
++        parse_cpurange(buf, &global_hvm_affinity_mask);
++    else
++       libxl_bitmap_set_any(&global_hvm_affinity_mask);
++    if (!xlu_cfg_get_string (config, "vm.pv.cpumask", &buf, 0))
++        parse_cpurange(buf, &global_pv_affinity_mask);
++    else
++        libxl_bitmap_set_any(&global_pv_affinity_mask);
++
+     xlu_cfg_destroy(config);
+ }
+ 
+diff --git a/tools/xl/xl.h b/tools/xl/xl.h
+index 4e784ff402..7e97144b50 100644
+--- a/tools/xl/xl.h
++++ b/tools/xl/xl.h
+@@ -41,6 +41,7 @@ struct domain_create {
+     int vncautopass;
+     int console_autoconnect;
+     int checkpointed_stream;
++    int ignore_global_affinity_masks;
+     const char *config_file;
+     char *extra_config; /* extra config string */
+     const char *restore_file;
+@@ -279,6 +280,9 @@ extern char *default_colo_proxy_script;
+ extern char *blkdev_start;
+ extern int max_grant_frames;
+ extern int max_maptrack_frames;
++extern libxl_bitmap global_vm_affinity_mask;
++extern libxl_bitmap global_hvm_affinity_mask;
++extern libxl_bitmap global_pv_affinity_mask;
+ 
+ enum output_format {
+     OUTPUT_FORMAT_JSON,
+@@ -294,6 +298,9 @@ typedef enum {
+ } domain_restart_type;
+ 
+ extern void printf_info_sexp(int domid, libxl_domain_config *d_config, FILE *fh);
++extern void apply_global_affinity_masks(libxl_domain_type type,
++                                        libxl_bitmap *vcpu_affinity_array,
++                                        unsigned int size);
+ 
+ #define XL_GLOBAL_CONFIG XEN_CONFIG_DIR "/xl.conf"
+ #define XL_LOCK_FILE XEN_LOCK_DIR "/xl"
+diff --git a/tools/xl/xl_cmdtable.c b/tools/xl/xl_cmdtable.c
+index bf2ced8140..54c2db6022 100644
+--- a/tools/xl/xl_cmdtable.c
++++ b/tools/xl/xl_cmdtable.c
+@@ -34,7 +34,8 @@ struct cmd_spec cmd_table[] = {
+       "-e                      Do not wait in the background for the death of the domain.\n"
+       "-V, --vncviewer         Connect to the VNC display after the domain is created.\n"
+       "-A, --vncviewer-autopass\n"
+-      "                        Pass VNC password to viewer via stdin."
++      "                        Pass VNC password to viewer via stdin.\n"
++      "--ignore-global-affinity-masks Ignore global masks in xl.conf."
+     },
+     { "config-update",
+       &main_config_update, 1, 1,
+@@ -224,7 +225,8 @@ struct cmd_spec cmd_table[] = {
+       &main_vcpupin, 1, 1,
+       "Set which CPUs a VCPU can use",
+       "[option] <Domain> <VCPU|all> <Hard affinity|-|all> <Soft affinity|-|all>",
+-      "-f, --force        undo an override pinning done by the kernel",
++      "-f, --force        undo an override pinning done by the kernel\n"
++      "--ignore-global-affinity-masks Ignore global masks in xl.conf",
+     },
+     { "vcpu-set",
+       &main_vcpuset, 0, 1,
+diff --git a/tools/xl/xl_vcpu.c b/tools/xl/xl_vcpu.c
+index 8e735b38c1..3384eeed06 100644
+--- a/tools/xl/xl_vcpu.c
++++ b/tools/xl/xl_vcpu.c
+@@ -68,6 +68,61 @@ static void print_domain_vcpuinfo(uint32_t domid, uint32_t nr_cpus)
+     libxl_vcpuinfo_list_free(vcpuinfo, nb_vcpu);
+ }
+ 
++void apply_global_affinity_masks(libxl_domain_type type,
++                                 libxl_bitmap *vcpu_affinity_array,
++                                 unsigned int size)
++{
++    libxl_bitmap *mask = &global_vm_affinity_mask;
++    libxl_bitmap *type_mask;
++    unsigned int i;
++
++    switch (type) {
++    case LIBXL_DOMAIN_TYPE_HVM:
++    case LIBXL_DOMAIN_TYPE_PVH:
++        type_mask = &global_hvm_affinity_mask;
++        break;
++    case LIBXL_DOMAIN_TYPE_PV:
++        type_mask = &global_pv_affinity_mask;
++        break;
++    default:
++        fprintf(stderr, "Unknown guest type\n");
++        exit(EXIT_FAILURE);
++    }
++
++    for (i = 0; i < size; i++) {
++        int rc;
++        libxl_bitmap *t = &vcpu_affinity_array[i];
++        libxl_bitmap b1, b2;
++
++        libxl_bitmap_init(&b1);
++        libxl_bitmap_init(&b2);
++
++        rc = libxl_bitmap_and(ctx, &b1, t, mask);
++        if (rc) {
++            fprintf(stderr, "libxl_bitmap_and errored\n");
++            exit(EXIT_FAILURE);
++        }
++        rc = libxl_bitmap_and(ctx, &b2, &b1, type_mask);
++        if (rc) {
++            fprintf(stderr, "libxl_bitmap_and errored\n");
++            exit(EXIT_FAILURE);
++        }
++
++        if (libxl_bitmap_is_empty(&b2)) {
++            fprintf(stderr, "vcpu hard affinity map is empty\n");
++            exit(EXIT_FAILURE);
++        }
++
++        /* Replace target bitmap with the result */
++        libxl_bitmap_dispose(t);
++        libxl_bitmap_init(t);
++        libxl_bitmap_copy_alloc(ctx, t, &b2);
++
++        libxl_bitmap_dispose(&b1);
++        libxl_bitmap_dispose(&b2);
++    }
++}
++
+ static void vcpulist(int argc, char **argv)
+ {
+     libxl_dominfo *dominfo;
+@@ -118,6 +173,7 @@ int main_vcpupin(int argc, char **argv)
+ {
+     static struct option opts[] = {
+         {"force", 0, 0, 'f'},
++        {"ignore-global-affinity-masks", 0, 0, 'i'},
+         COMMON_LONG_OPTS
+     };
+     libxl_vcpuinfo *vcpuinfo;
+@@ -132,15 +188,18 @@ int main_vcpupin(int argc, char **argv)
+     const char *vcpu, *hard_str, *soft_str;
+     char *endptr;
+     int opt, nb_cpu, nb_vcpu, rc = EXIT_FAILURE;
+-    bool force = false;
++    bool force = false, ignore_masks = false;
+ 
+     libxl_bitmap_init(&cpumap_hard);
+     libxl_bitmap_init(&cpumap_soft);
+ 
+-    SWITCH_FOREACH_OPT(opt, "f", opts, "vcpu-pin", 3) {
++    SWITCH_FOREACH_OPT(opt, "fi", opts, "vcpu-pin", 3) {
+     case 'f':
+         force = true;
+         break;
++    case 'i':
++        ignore_masks = true;
++        break;
+     default:
+         break;
+     }
+@@ -222,6 +281,23 @@ int main_vcpupin(int argc, char **argv)
+         goto out;
+     }
+ 
++    /* Only hard affinity matters here */
++    if (!ignore_masks) {
++        libxl_domain_config d_config;
++
++        libxl_domain_config_init(&d_config);
++        rc = libxl_retrieve_domain_configuration(ctx, domid, &d_config);
++        if (rc) {
++            fprintf(stderr, "Could not retrieve domain configuration\n");
++            libxl_domain_config_dispose(&d_config);
++            goto out;
++        }
++
++        apply_global_affinity_masks(d_config.b_info.type, hard, 1);
++
++        libxl_domain_config_dispose(&d_config);
++    }
++
+     if (force) {
+         if (libxl_set_vcpuaffinity_force(ctx, domid, vcpuid, hard, soft)) {
+             fprintf(stderr, "Could not set affinity for vcpu `%ld'.\n",
+diff --git a/tools/xl/xl_vmcontrol.c b/tools/xl/xl_vmcontrol.c
+index 89c2b25ded..a1d633795c 100644
+--- a/tools/xl/xl_vmcontrol.c
++++ b/tools/xl/xl_vmcontrol.c
+@@ -804,6 +804,36 @@ int create_domain(struct domain_create *dom_info)
+         parse_config_data(config_source, config_data, config_len, &d_config);
+     }
+ 
++    if (!dom_info->ignore_global_affinity_masks) {
++        libxl_domain_build_info *b_info = &d_config.b_info;
++
++        /* It is possible that no hard affinity is specified in config file.
++         * Generate hard affinity maps now if we care about those.
++         */
++        if (b_info->num_vcpu_hard_affinity == 0 &&
++              (!libxl_bitmap_is_full(&global_vm_affinity_mask) ||
++                 (b_info->type == LIBXL_DOMAIN_TYPE_PV &&
++                  !libxl_bitmap_is_full(&global_pv_affinity_mask)) ||
++                 (b_info->type != LIBXL_DOMAIN_TYPE_PV &&
++                  !libxl_bitmap_is_full(&global_hvm_affinity_mask))
++               )) {
++            b_info->num_vcpu_hard_affinity = b_info->max_vcpus;
++            b_info->vcpu_hard_affinity =
++                xmalloc(b_info->max_vcpus * sizeof(libxl_bitmap));
++
++            for (i = 0; i < b_info->num_vcpu_hard_affinity; i++) {
++                libxl_bitmap *m = &b_info->vcpu_hard_affinity[i];
++                libxl_bitmap_init(m);
++                libxl_cpu_bitmap_alloc(ctx, m, 0);
++                libxl_bitmap_set_any(m);
++            }
++        }
++
++        apply_global_affinity_masks(b_info->type,
++                                    b_info->vcpu_hard_affinity,
++                                    b_info->num_vcpu_hard_affinity);
++    }
++
+     if (migrate_fd >= 0) {
+         if (d_config.c_info.name) {
+             /* when we receive a domain we get its name from the config
+@@ -1124,7 +1154,7 @@ int main_create(int argc, char **argv)
+     const char *filename = NULL;
+     struct domain_create dom_info;
+     int paused = 0, debug = 0, daemonize = 1, console_autoconnect = 0,
+-        quiet = 0, monitor = 1, vnc = 0, vncautopass = 0;
++        quiet = 0, monitor = 1, vnc = 0, vncautopass = 0, ignore_masks = 0;
+     int opt, rc;
+     static struct option opts[] = {
+         {"dryrun", 0, 0, 'n'},
+@@ -1132,6 +1162,7 @@ int main_create(int argc, char **argv)
+         {"defconfig", 1, 0, 'f'},
+         {"vncviewer", 0, 0, 'V'},
+         {"vncviewer-autopass", 0, 0, 'A'},
++        {"ignore-global-affinity-masks", 0, 0, 'i'},
+         COMMON_LONG_OPTS
+     };
+ 
+@@ -1142,7 +1173,7 @@ int main_create(int argc, char **argv)
+         argc--; argv++;
+     }
+ 
+-    SWITCH_FOREACH_OPT(opt, "Fnqf:pcdeVA", opts, "create", 0) {
++    SWITCH_FOREACH_OPT(opt, "Fnqf:pcdeVAi", opts, "create", 0) {
+     case 'f':
+         filename = optarg;
+         break;
+@@ -1174,6 +1205,9 @@ int main_create(int argc, char **argv)
+     case 'A':
+         vnc = vncautopass = 1;
+         break;
++    case 'i':
++        ignore_masks = 1;
++        break;
+     }
+ 
+     memset(&dom_info, 0, sizeof(dom_info));
+@@ -1203,6 +1237,7 @@ int main_create(int argc, char **argv)
+     dom_info.vnc = vnc;
+     dom_info.vncautopass = vncautopass;
+     dom_info.console_autoconnect = console_autoconnect;
++    dom_info.ignore_global_affinity_masks = ignore_masks;
+ 
+     rc = create_domain(&dom_info);
+     if (rc < 0) {
+#diff --git a/xen/Makefile b/xen/Makefile
+#index 4d075c381f..a922a1b7b5 100644
+#--- a/xen/Makefile
+#+++ b/xen/Makefile
+#@@ -2,7 +2,7 @@
+# # All other places this is stored (eg. compile.h) should be autogenerated.
+# export XEN_VERSION       = 4
+# export XEN_SUBVERSION    = 11
+#-export XEN_EXTRAVERSION ?= .0$(XEN_VENDORVERSION)
+#+export XEN_EXTRAVERSION ?= .1-pre$(XEN_VENDORVERSION)
+# export XEN_FULLVERSION   = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
+# -include xen-version
+# 
+diff --git a/xen/arch/x86/Kconfig b/xen/arch/x86/Kconfig
+index f64fc56739..cfba4a708c 100644
+--- a/xen/arch/x86/Kconfig
++++ b/xen/arch/x86/Kconfig
+@@ -72,6 +72,7 @@ config SHADOW_PAGING
+             * Running HVM guests on hardware lacking hardware paging support
+               (First-generation Intel VT-x or AMD SVM).
+             * Live migration of PV guests.
++            * L1TF sidechannel mitigation for PV guests.
+ 
+           Under a small number of specific workloads, shadow paging may be
+           deliberately used as a performance optimisation.
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index 458a3fe60c..76078b55b2 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -505,17 +505,23 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
+                 u32 eax, ebx, ecx, edx;
+ 
+                 cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+-                c->compute_unit_id = ebx & 0xFF;
+                 c->x86_num_siblings = ((ebx >> 8) & 0x3) + 1;
++
++                if (c->x86 < 0x17)
++                        c->compute_unit_id = ebx & 0xFF;
++                else {
++                        c->cpu_core_id = ebx & 0xFF;
++                        c->x86_max_cores /= c->x86_num_siblings;
++                }
+         }
+         
+         if (opt_cpu_info)
+                 printk("CPU %d(%d) -> Processor %d, %s %d\n",
+                        cpu, c->x86_max_cores, c->phys_proc_id,
+-                       cpu_has(c, X86_FEATURE_TOPOEXT) ? "Compute Unit" : 
+-                                                         "Core",
+-                       cpu_has(c, X86_FEATURE_TOPOEXT) ? c->compute_unit_id :
+-                                                         c->cpu_core_id);
++                       c->compute_unit_id != INVALID_CUID ? "Compute Unit"
++                                                          : "Core",
++                       c->compute_unit_id != INVALID_CUID ? c->compute_unit_id
++                                                          : c->cpu_core_id);
+ }
+ 
+ static void early_init_amd(struct cpuinfo_x86 *c)
+diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c
+index 528aff1811..fdb022875a 100644
+--- a/xen/arch/x86/cpu/common.c
++++ b/xen/arch/x86/cpu/common.c
+@@ -14,6 +14,7 @@
+ #include <public/sysctl.h> /* for XEN_INVALID_{SOCKET,CORE}_ID */
+ 
+ #include "cpu.h"
++#include "mcheck/x86_mca.h"
+ 
+ bool_t opt_arat = 1;
+ boolean_param("arat", opt_arat);
+@@ -355,6 +356,9 @@ static void __init early_cpu_detect(void)
+ 			hap_paddr_bits = PADDR_BITS;
+ 	}
+ 
++	if (c->x86_vendor != X86_VENDOR_AMD)
++		park_offline_cpus = opt_mce;
++
+ 	initialize_cpu_data(0);
+ }
+ 
+diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c
+index a8c287d124..32273d9208 100644
+--- a/xen/arch/x86/cpu/mcheck/mce.c
++++ b/xen/arch/x86/cpu/mcheck/mce.c
+@@ -692,12 +692,15 @@ static void cpu_bank_free(unsigned int cpu)
+ 
+     mcabanks_free(poll);
+     mcabanks_free(clr);
++
++    per_cpu(poll_bankmask, cpu) = NULL;
++    per_cpu(mce_clear_banks, cpu) = NULL;
+ }
+ 
+ static int cpu_bank_alloc(unsigned int cpu)
+ {
+-    struct mca_banks *poll = mcabanks_alloc();
+-    struct mca_banks *clr = mcabanks_alloc();
++    struct mca_banks *poll = per_cpu(poll_bankmask, cpu) ?: mcabanks_alloc();
++    struct mca_banks *clr = per_cpu(mce_clear_banks, cpu) ?: mcabanks_alloc();
+ 
+     if ( !poll || !clr )
+     {
+@@ -725,7 +728,13 @@ static int cpu_callback(
+ 
+     case CPU_UP_CANCELED:
+     case CPU_DEAD:
+-        cpu_bank_free(cpu);
++        if ( !park_offline_cpus )
++            cpu_bank_free(cpu);
++        break;
++
++    case CPU_REMOVE:
++        if ( park_offline_cpus )
++            cpu_bank_free(cpu);
+         break;
+     }
+ 
+diff --git a/xen/arch/x86/cpu/mcheck/mce_intel.c b/xen/arch/x86/cpu/mcheck/mce_intel.c
+index e5dd956a24..4474a34e34 100644
+--- a/xen/arch/x86/cpu/mcheck/mce_intel.c
++++ b/xen/arch/x86/cpu/mcheck/mce_intel.c
+@@ -636,8 +636,6 @@ static void clear_cmci(void)
+ 
+ static void cpu_mcheck_disable(void)
+ {
+-    clear_in_cr4(X86_CR4_MCE);
+-
+     if ( cmci_support && opt_mce )
+         clear_cmci();
+ }
+diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
+index 207e2e712c..6e27f6ec8e 100644
+--- a/xen/arch/x86/cpu/vpmu_intel.c
++++ b/xen/arch/x86/cpu/vpmu_intel.c
+@@ -454,13 +454,11 @@ static int core2_vpmu_alloc_resource(struct vcpu *v)
+ 
+     if ( is_hvm_vcpu(v) )
+     {
+-        wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+-        if ( vmx_add_host_load_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
++        if ( vmx_add_host_load_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, 0) )
+             goto out_err;
+ 
+-        if ( vmx_add_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL) )
++        if ( vmx_add_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, 0) )
+             goto out_err;
+-        vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+     }
+ 
+     core2_vpmu_cxt = xzalloc_bytes(sizeof(*core2_vpmu_cxt) +
+@@ -535,27 +533,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+     uint64_t *enabled_cntrs;
+ 
+     if ( !core2_vpmu_msr_common_check(msr, &type, &index) )
+-    {
+-        /* Special handling for BTS */
+-        if ( msr == MSR_IA32_DEBUGCTLMSR )
+-        {
+-            supported |= IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS |
+-                         IA32_DEBUGCTLMSR_BTINT;
+-
+-            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
+-                supported |= IA32_DEBUGCTLMSR_BTS_OFF_OS |
+-                             IA32_DEBUGCTLMSR_BTS_OFF_USR;
+-            if ( !(msr_content & ~supported) &&
+-                 vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
+-                return 0;
+-            if ( (msr_content & supported) &&
+-                 !vpmu_is_set(vpmu, VPMU_CPU_HAS_BTS) )
+-                printk(XENLOG_G_WARNING
+-                       "%pv: Debug Store unsupported on this CPU\n",
+-                       current);
+-        }
+         return -EINVAL;
+-    }
+ 
+     ASSERT(!supported);
+ 
+@@ -613,7 +591,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+             return -EINVAL;
+ 
+         if ( is_hvm_vcpu(v) )
+-            vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
++            vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
+                                &core2_vpmu_cxt->global_ctrl);
+         else
+             rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
+@@ -682,7 +660,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+                 return -EINVAL;
+ 
+             if ( is_hvm_vcpu(v) )
+-                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL,
++                vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
+                                    &core2_vpmu_cxt->global_ctrl);
+             else
+                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, core2_vpmu_cxt->global_ctrl);
+@@ -701,7 +679,7 @@ static int core2_vpmu_do_wrmsr(unsigned int msr, uint64_t msr_content,
+     else
+     {
+         if ( is_hvm_vcpu(v) )
+-            vmx_write_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
++            vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+         else
+             wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+     }
+@@ -735,7 +713,7 @@ static int core2_vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content)
+             break;
+         case MSR_CORE_PERF_GLOBAL_CTRL:
+             if ( is_hvm_vcpu(v) )
+-                vmx_read_guest_msr(MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
++                vmx_read_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL, msr_content);
+             else
+                 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
+             break;
+diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
+index beee47d0ed..5cc89e2b34 100644
+--- a/xen/arch/x86/cpuid.c
++++ b/xen/arch/x86/cpuid.c
+@@ -43,6 +43,11 @@ static int __init parse_xen_cpuid(const char *s)
+             if ( !val )
+                 setup_clear_cpu_cap(X86_FEATURE_STIBP);
+         }
++        else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 )
++        {
++            if ( !val )
++                setup_clear_cpu_cap(X86_FEATURE_L1D_FLUSH);
++        }
+         else if ( (val = parse_boolean("ssbd", s, ss)) >= 0 )
+         {
+             if ( !val )
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 9850a782ec..c39cf2c6e5 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -107,10 +107,11 @@ static void play_dead(void)
+     local_irq_disable();
+ 
+     /*
+-     * NOTE: After cpu_exit_clear, per-cpu variables are no longer accessible,
+-     * as they may be freed at any time. In this case, heap corruption or
+-     * #PF can occur (when heap debugging is enabled). For example, even
+-     * printk() can involve tasklet scheduling, which touches per-cpu vars.
++     * NOTE: After cpu_exit_clear, per-cpu variables may no longer accessible,
++     * as they may be freed at any time if offline CPUs don't get parked. In
++     * this case, heap corruption or #PF can occur (when heap debugging is
++     * enabled). For example, even printk() can involve tasklet scheduling,
++     * which touches per-cpu vars.
+      * 
+      * Consider very carefully when adding code to *dead_idle. Most hypervisor
+      * subsystems are unsafe to call.
+diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
+index 8fbbf3aeb3..dd91038a67 100644
+--- a/xen/arch/x86/domctl.c
++++ b/xen/arch/x86/domctl.c
+@@ -225,7 +225,8 @@ static int update_domain_cpuid_info(struct domain *d,
+          */
+         call_policy_changed = (is_hvm_domain(d) &&
+                                ((old_7d0 ^ p->feat.raw[0].d) &
+-                                cpufeat_mask(X86_FEATURE_IBRSB)));
++                                (cpufeat_mask(X86_FEATURE_IBRSB) |
++                                 cpufeat_mask(X86_FEATURE_L1D_FLUSH))));
+         break;
+ 
+     case 0xa:
+@@ -1163,7 +1164,7 @@ long arch_do_domctl(
+             if ( _xcr0_accum )
+             {
+                 if ( evc->size >= PV_XSAVE_HDR_SIZE + XSTATE_AREA_MIN_SIZE )
+-                    ret = validate_xstate(_xcr0, _xcr0_accum,
++                    ret = validate_xstate(d, _xcr0, _xcr0_accum,
+                                           &_xsave_area->xsave_hdr);
+             }
+             else if ( !_xcr0 )
+@@ -1187,8 +1188,7 @@ long arch_do_domctl(
+                 vcpu_pause(v);
+                 v->arch.xcr0 = _xcr0;
+                 v->arch.xcr0_accum = _xcr0_accum;
+-                if ( _xcr0_accum & XSTATE_NONLAZY )
+-                    v->arch.nonlazy_xstate_used = 1;
++                v->arch.nonlazy_xstate_used = _xcr0_accum & XSTATE_NONLAZY;
+                 compress_xsave_states(v, _xsave_area,
+                                       evc->size - PV_XSAVE_HDR_SIZE);
+                 vcpu_unpause(v);
+diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c
+index 4779b0d0d5..d997806272 100644
+--- a/xen/arch/x86/genapic/x2apic.c
++++ b/xen/arch/x86/genapic/x2apic.c
+@@ -201,18 +201,21 @@ static int update_clusterinfo(
+         if ( !cluster_cpus_spare )
+             cluster_cpus_spare = xzalloc(cpumask_t);
+         if ( !cluster_cpus_spare ||
+-             !alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) )
++             !cond_alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) )
+             err = -ENOMEM;
+         break;
+     case CPU_UP_CANCELED:
+     case CPU_DEAD:
++    case CPU_REMOVE:
++        if ( park_offline_cpus == (action != CPU_REMOVE) )
++            break;
+         if ( per_cpu(cluster_cpus, cpu) )
+         {
+             cpumask_clear_cpu(cpu, per_cpu(cluster_cpus, cpu));
+             if ( cpumask_empty(per_cpu(cluster_cpus, cpu)) )
+-                xfree(per_cpu(cluster_cpus, cpu));
++                XFREE(per_cpu(cluster_cpus, cpu));
+         }
+-        free_cpumask_var(per_cpu(scratch_mask, cpu));
++        FREE_CPUMASK_VAR(per_cpu(scratch_mask, cpu));
+         break;
+     }
+ 
+diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
+index c23983cdff..4cbb688c05 100644
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -907,6 +907,9 @@ const char *hvm_efer_valid(const struct vcpu *v, uint64_t value,
+     else
+         p = &host_cpuid_policy;
+ 
++    if ( value & ~EFER_KNOWN_MASK )
++        return "Unknown bits set";
++
+     if ( (value & EFER_SCE) && !p->extd.syscall )
+         return "SCE without feature";
+ 
+@@ -1269,7 +1272,7 @@ static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h)
+     ctxt = (struct hvm_hw_cpu_xsave *)&h->data[h->cur];
+     h->cur += desc->length;
+ 
+-    err = validate_xstate(ctxt->xcr0, ctxt->xcr0_accum,
++    err = validate_xstate(d, ctxt->xcr0, ctxt->xcr0_accum,
+                           (const void *)&ctxt->save_area.xsave_hdr);
+     if ( err )
+     {
+@@ -1324,8 +1327,7 @@ static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h)
+ 
+     v->arch.xcr0 = ctxt->xcr0;
+     v->arch.xcr0_accum = ctxt->xcr0_accum;
+-    if ( ctxt->xcr0_accum & XSTATE_NONLAZY )
+-        v->arch.nonlazy_xstate_used = 1;
++    v->arch.nonlazy_xstate_used = ctxt->xcr0_accum & XSTATE_NONLAZY;
+     compress_xsave_states(v, &ctxt->save_area,
+                           size - offsetof(struct hvm_hw_cpu_xsave, save_area));
+ 
+diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
+index 165500e3f2..b964c59dad 100644
+--- a/xen/arch/x86/hvm/svm/svm.c
++++ b/xen/arch/x86/hvm/svm/svm.c
+@@ -1432,24 +1432,18 @@ static void svm_inject_event(const struct x86_event *event)
+      * Xen must emulate enough of the event injection to be sure that a
+      * further fault shouldn't occur during delivery.  This covers the fact
+      * that hardware doesn't perform DPL checking on injection.
+-     *
+-     * Also, it accounts for proper positioning of %rip for an event with trap
+-     * semantics (where %rip should point after the instruction) which suffers
+-     * a fault during injection (at which point %rip should point at the
+-     * instruction).
+      */
+     if ( event->type == X86_EVENTTYPE_PRI_SW_EXCEPTION ||
+-         (!cpu_has_svm_nrips && (event->type == X86_EVENTTYPE_SW_INTERRUPT ||
+-                                 event->type == X86_EVENTTYPE_SW_EXCEPTION)) )
++         (!cpu_has_svm_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) )
+         svm_emul_swint_injection(&_event);
+ 
+-    switch ( _event.vector )
++    switch ( _event.vector | -(_event.type == X86_EVENTTYPE_SW_INTERRUPT) )
+     {
+     case TRAP_debug:
+         if ( regs->eflags & X86_EFLAGS_TF )
+         {
+             __restore_debug_registers(vmcb, curr);
+-            vmcb_set_dr6(vmcb, vmcb_get_dr6(vmcb) | 0x4000);
++            vmcb_set_dr6(vmcb, vmcb_get_dr6(vmcb) | DR_STEP);
+         }
+         /* fall through */
+     case TRAP_int3:
+@@ -1459,6 +1453,13 @@ static void svm_inject_event(const struct x86_event *event)
+             domain_pause_for_debugger();
+             return;
+         }
++        break;
++
++    case TRAP_page_fault:
++        ASSERT(_event.type == X86_EVENTTYPE_HW_EXCEPTION);
++        curr->arch.hvm_vcpu.guest_cr[2] = _event.cr2;
++        vmcb_set_cr2(vmcb, _event.cr2);
++        break;
+     }
+ 
+     if ( unlikely(eventinj.fields.v) &&
+@@ -1481,13 +1482,9 @@ static void svm_inject_event(const struct x86_event *event)
+      * icebp, software events with trap semantics need emulating, so %rip in
+      * the trap frame points after the instruction.
+      *
+-     * The x86 emulator (if requested by the x86_swint_emulate_* choice) will
+-     * have performed checks such as presence/dpl/etc and believes that the
+-     * event injection will succeed without faulting.
+-     *
+-     * The x86 emulator will always provide fault semantics for software
+-     * events, with _trap.insn_len set appropriately.  If the injection
+-     * requires emulation, move %rip forwards at this point.
++     * svm_emul_swint_injection() has already confirmed that events with trap
++     * semantics won't fault on injection.  Position %rip/NextRIP suitably,
++     * and restrict the event type to what hardware will tolerate.
+      */
+     switch ( _event.type )
+     {
+@@ -1544,16 +1541,12 @@ static void svm_inject_event(const struct x86_event *event)
+            eventinj.fields.errorcode == (uint16_t)eventinj.fields.errorcode);
+     vmcb->eventinj = eventinj;
+ 
+-    if ( _event.vector == TRAP_page_fault )
+-    {
+-        curr->arch.hvm_vcpu.guest_cr[2] = _event.cr2;
+-        vmcb_set_cr2(vmcb, _event.cr2);
+-        HVMTRACE_LONG_2D(PF_INJECT, _event.error_code, TRC_PAR_LONG(_event.cr2));
+-    }
++    if ( _event.vector == TRAP_page_fault &&
++         _event.type == X86_EVENTTYPE_HW_EXCEPTION )
++        HVMTRACE_LONG_2D(PF_INJECT, _event.error_code,
++                         TRC_PAR_LONG(_event.cr2));
+     else
+-    {
+         HVMTRACE_2D(INJ_EXC, _event.vector, _event.error_code);
+-    }
+ }
+ 
+ static int svm_event_pending(struct vcpu *v)
+diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S
+index aa2f103895..afd552f2b9 100644
+--- a/xen/arch/x86/hvm/vmx/entry.S
++++ b/xen/arch/x86/hvm/vmx/entry.S
+@@ -41,6 +41,15 @@ ENTRY(vmx_asm_vmexit_handler)
+         SPEC_CTRL_ENTRY_FROM_HVM    /* Req: b=curr %rsp=regs/cpuinfo, Clob: acd */
+         /* WARNING! `ret`, `call *`, `jmp *` not safe before this point. */
+ 
++        /* Hardware clears MSR_DEBUGCTL on VMExit.  Reinstate it if debugging Xen. */
++        .macro restore_lbr
++            mov $IA32_DEBUGCTLMSR_LBR, %eax
++            mov $MSR_IA32_DEBUGCTLMSR, %ecx
++            xor %edx, %edx
++            wrmsr
++        .endm
++        ALTERNATIVE "", restore_lbr, X86_FEATURE_XEN_LBR
++
+         mov  %rsp,%rdi
+         call vmx_vmexit_handler
+ 
+diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c
+index 258fc08f72..2ba0c40808 100644
+--- a/xen/arch/x86/hvm/vmx/vmcs.c
++++ b/xen/arch/x86/hvm/vmx/vmcs.c
+@@ -38,6 +38,7 @@
+ #include <asm/flushtlb.h>
+ #include <asm/monitor.h>
+ #include <asm/shadow.h>
++#include <asm/spec_ctrl.h>
+ #include <asm/tboot.h>
+ #include <asm/apic.h>
+ 
+@@ -996,6 +997,7 @@ static int construct_vmcs(struct vcpu *v)
+     struct domain *d = v->domain;
+     u32 vmexit_ctl = vmx_vmexit_control;
+     u32 vmentry_ctl = vmx_vmentry_control;
++    int rc = 0;
+ 
+     vmx_vmcs_enter(v);
+ 
+@@ -1083,8 +1085,8 @@ static int construct_vmcs(struct vcpu *v)
+ 
+         if ( msr_bitmap == NULL )
+         {
+-            vmx_vmcs_exit(v);
+-            return -ENOMEM;
++            rc = -ENOMEM;
++            goto out;
+         }
+ 
+         memset(msr_bitmap, ~0, PAGE_SIZE);
+@@ -1268,141 +1270,197 @@ static int construct_vmcs(struct vcpu *v)
+     if ( cpu_has_vmx_tsc_scaling )
+         __vmwrite(TSC_MULTIPLIER, d->arch.hvm_domain.tsc_scaling_ratio);
+ 
+-    vmx_vmcs_exit(v);
+-
+     /* will update HOST & GUEST_CR3 as reqd */
+     paging_update_paging_modes(v);
+ 
+     vmx_vlapic_msr_changed(v);
+ 
+-    return 0;
++    if ( opt_l1d_flush && paging_mode_hap(d) )
++        rc = vmx_add_msr(v, MSR_FLUSH_CMD, FLUSH_CMD_L1D,
++                         VMX_MSR_GUEST_LOADONLY);
++
++ out:
++    vmx_vmcs_exit(v);
++
++    return rc;
+ }
+ 
+-static int vmx_msr_entry_key_cmp(const void *key, const void *elt)
++/*
++ * Search an MSR list looking for an MSR entry, or the slot in which it should
++ * live (to keep the data sorted) if an entry is not found.
++ *
++ * The return pointer is guaranteed to be bounded by start and end.  However,
++ * it may point at end, and may be invalid for the caller to dereference.
++ */
++static struct vmx_msr_entry *locate_msr_entry(
++    struct vmx_msr_entry *start, struct vmx_msr_entry *end, uint32_t msr)
+ {
+-    const u32 *msr = key;
+-    const struct vmx_msr_entry *entry = elt;
++    while ( start < end )
++    {
++        struct vmx_msr_entry *mid = start + (end - start) / 2;
+ 
+-    if ( *msr > entry->index )
+-        return 1;
+-    if ( *msr < entry->index )
+-        return -1;
++        if ( msr < mid->index )
++            end = mid;
++        else if ( msr > mid->index )
++            start = mid + 1;
++        else
++            return mid;
++    }
+ 
+-    return 0;
++    return start;
+ }
+ 
+-struct vmx_msr_entry *vmx_find_msr(u32 msr, int type)
++struct vmx_msr_entry *vmx_find_msr(const struct vcpu *v, uint32_t msr,
++                                   enum vmx_msr_list_type type)
+ {
+-    struct vcpu *curr = current;
+-    unsigned int msr_count;
+-    struct vmx_msr_entry *msr_area;
++    const struct arch_vmx_struct *vmx = &v->arch.hvm_vmx;
++    struct vmx_msr_entry *start = NULL, *ent, *end;
++    unsigned int substart = 0, subend = vmx->msr_save_count;
++    unsigned int total = vmx->msr_load_count;
+ 
+-    if ( type == VMX_GUEST_MSR )
+-    {
+-        msr_count = curr->arch.hvm_vmx.msr_count;
+-        msr_area = curr->arch.hvm_vmx.msr_area;
+-    }
+-    else
++    ASSERT(v == current || !vcpu_runnable(v));
++
++    switch ( type )
+     {
+-        ASSERT(type == VMX_HOST_MSR);
+-        msr_count = curr->arch.hvm_vmx.host_msr_count;
+-        msr_area = curr->arch.hvm_vmx.host_msr_area;
++    case VMX_MSR_HOST:
++        start    = vmx->host_msr_area;
++        subend   = vmx->host_msr_count;
++        total    = subend;
++        break;
++
++    case VMX_MSR_GUEST:
++        start    = vmx->msr_area;
++        break;
++
++    case VMX_MSR_GUEST_LOADONLY:
++        start    = vmx->msr_area;
++        substart = subend;
++        subend   = total;
++        break;
++
++    default:
++        ASSERT_UNREACHABLE();
+     }
+ 
+-    if ( msr_area == NULL )
++    if ( !start )
+         return NULL;
+ 
+-    return bsearch(&msr, msr_area, msr_count, sizeof(struct vmx_msr_entry),
+-                   vmx_msr_entry_key_cmp);
++    end = start + total;
++    ent = locate_msr_entry(start + substart, start + subend, msr);
++
++    return ((ent < end) && (ent->index == msr)) ? ent : NULL;
+ }
+ 
+-int vmx_read_guest_msr(u32 msr, u64 *val)
++int vmx_add_msr(struct vcpu *v, uint32_t msr, uint64_t val,
++                enum vmx_msr_list_type type)
+ {
+-    struct vmx_msr_entry *ent;
++    struct arch_vmx_struct *vmx = &v->arch.hvm_vmx;
++    struct vmx_msr_entry **ptr, *start = NULL, *ent, *end;
++    unsigned int substart, subend, total;
++    int rc;
+ 
+-    if ( (ent = vmx_find_msr(msr, VMX_GUEST_MSR)) != NULL )
++    ASSERT(v == current || !vcpu_runnable(v));
++
++    switch ( type )
+     {
+-        *val = ent->data;
+-        return 0;
+-    }
++    case VMX_MSR_HOST:
++        ptr      = &vmx->host_msr_area;
++        substart = 0;
++        subend   = vmx->host_msr_count;
++        total    = subend;
++        break;
+ 
+-    return -ESRCH;
+-}
++    case VMX_MSR_GUEST:
++        ptr      = &vmx->msr_area;
++        substart = 0;
++        subend   = vmx->msr_save_count;
++        total    = vmx->msr_load_count;
++        break;
+ 
+-int vmx_write_guest_msr(u32 msr, u64 val)
+-{
+-    struct vmx_msr_entry *ent;
++    case VMX_MSR_GUEST_LOADONLY:
++        ptr      = &vmx->msr_area;
++        substart = vmx->msr_save_count;
++        subend   = vmx->msr_load_count;
++        total    = subend;
++        break;
+ 
+-    if ( (ent = vmx_find_msr(msr, VMX_GUEST_MSR)) != NULL )
+-    {
+-        ent->data = val;
+-        return 0;
++    default:
++        ASSERT_UNREACHABLE();
++        return -EINVAL;
+     }
+ 
+-    return -ESRCH;
+-}
+-
+-int vmx_add_msr(u32 msr, int type)
+-{
+-    struct vcpu *curr = current;
+-    unsigned int idx, *msr_count;
+-    struct vmx_msr_entry **msr_area, *msr_area_elem;
++    vmx_vmcs_enter(v);
+ 
+-    if ( type == VMX_GUEST_MSR )
+-    {
+-        msr_count = &curr->arch.hvm_vmx.msr_count;
+-        msr_area = &curr->arch.hvm_vmx.msr_area;
+-    }
+-    else
++    /* Allocate memory on first use. */
++    if ( unlikely(!*ptr) )
+     {
+-        ASSERT(type == VMX_HOST_MSR);
+-        msr_count = &curr->arch.hvm_vmx.host_msr_count;
+-        msr_area = &curr->arch.hvm_vmx.host_msr_area;
+-    }
++        paddr_t addr;
+ 
+-    if ( *msr_area == NULL )
+-    {
+-        if ( (*msr_area = alloc_xenheap_page()) == NULL )
+-            return -ENOMEM;
++        if ( (*ptr = alloc_xenheap_page()) == NULL )
++        {
++            rc = -ENOMEM;
++            goto out;
++        }
+ 
+-        if ( type == VMX_GUEST_MSR )
++        addr = virt_to_maddr(*ptr);
++
++        switch ( type )
+         {
+-            __vmwrite(VM_EXIT_MSR_STORE_ADDR, virt_to_maddr(*msr_area));
+-            __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, virt_to_maddr(*msr_area));
++        case VMX_MSR_HOST:
++            __vmwrite(VM_EXIT_MSR_LOAD_ADDR, addr);
++            break;
++
++        case VMX_MSR_GUEST:
++        case VMX_MSR_GUEST_LOADONLY:
++            __vmwrite(VM_EXIT_MSR_STORE_ADDR, addr);
++            __vmwrite(VM_ENTRY_MSR_LOAD_ADDR, addr);
++            break;
+         }
+-        else
+-            __vmwrite(VM_EXIT_MSR_LOAD_ADDR, virt_to_maddr(*msr_area));
+     }
+ 
+-    for ( idx = 0; idx < *msr_count && (*msr_area)[idx].index <= msr; idx++ )
+-        if ( (*msr_area)[idx].index == msr )
+-            return 0;
++    start = *ptr;
++    end   = start + total;
++    ent   = locate_msr_entry(start + substart, start + subend, msr);
+ 
+-    if ( *msr_count == (PAGE_SIZE / sizeof(struct vmx_msr_entry)) )
+-        return -ENOSPC;
++    if ( (ent < end) && (ent->index == msr) )
++        goto found;
+ 
+-    memmove(*msr_area + idx + 1, *msr_area + idx,
+-            sizeof(*msr_area_elem) * (*msr_count - idx));
++    /* If there isn't an existing entry for msr, insert room for one. */
++    if ( total == (PAGE_SIZE / sizeof(*ent)) )
++    {
++        rc = -ENOSPC;
++        goto out;
++    }
+ 
+-    msr_area_elem = *msr_area + idx;
+-    msr_area_elem->index = msr;
+-    msr_area_elem->mbz = 0;
++    memmove(ent + 1, ent, sizeof(*ent) * (end - ent));
+ 
+-    ++*msr_count;
++    ent->index = msr;
++    ent->mbz = 0;
+ 
+-    if ( type == VMX_GUEST_MSR )
++    switch ( type )
+     {
+-        msr_area_elem->data = 0;
+-        __vmwrite(VM_EXIT_MSR_STORE_COUNT, *msr_count);
+-        __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, *msr_count);
+-    }
+-    else
+-    {
+-        rdmsrl(msr, msr_area_elem->data);
+-        __vmwrite(VM_EXIT_MSR_LOAD_COUNT, *msr_count);
++    case VMX_MSR_HOST:
++        __vmwrite(VM_EXIT_MSR_LOAD_COUNT, ++vmx->host_msr_count);
++        break;
++
++    case VMX_MSR_GUEST:
++        __vmwrite(VM_EXIT_MSR_STORE_COUNT, ++vmx->msr_save_count);
++
++        /* Fallthrough */
++    case VMX_MSR_GUEST_LOADONLY:
++        __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, ++vmx->msr_load_count);
++        break;
+     }
+ 
+-    return 0;
++    /* Set the msr's value. */
++ found:
++    ent->data = val;
++    rc = 0;
++
++ out:
++    vmx_vmcs_exit(v);
++
++    return rc;
+ }
+ 
+ void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector)
+diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
+index 610c8d6eb9..b0fababede 100644
+--- a/xen/arch/x86/hvm/vmx/vmx.c
++++ b/xen/arch/x86/hvm/vmx/vmx.c
+@@ -583,6 +583,12 @@ static void vmx_cpuid_policy_changed(struct vcpu *v)
+         vmx_clear_msr_intercept(v, MSR_PRED_CMD,  VMX_MSR_RW);
+     else
+         vmx_set_msr_intercept(v, MSR_PRED_CMD,  VMX_MSR_RW);
++
++    /* MSR_FLUSH_CMD is safe to pass through if the guest knows about it. */
++    if ( cp->feat.l1d_flush )
++        vmx_clear_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
++    else
++        vmx_set_msr_intercept(v, MSR_FLUSH_CMD, VMX_MSR_RW);
+ }
+ 
+ int vmx_guest_x86_mode(struct vcpu *v)
+@@ -2758,8 +2764,10 @@ enum
+ 
+ #define LBR_FROM_SIGNEXT_2MSB  ((1ULL << 59) | (1ULL << 60))
+ 
+-#define FIXUP_LBR_TSX            (1u << 0)
+-#define FIXUP_BDW_ERRATUM_BDF14  (1u << 1)
++#define LBR_MSRS_INSERTED      (1u << 0)
++#define LBR_FIXUP_TSX          (1u << 1)
++#define LBR_FIXUP_BDF14        (1u << 2)
++#define LBR_FIXUP_MASK         (LBR_FIXUP_TSX | LBR_FIXUP_BDF14)
+ 
+ static bool __read_mostly lbr_tsx_fixup_needed;
+ static bool __read_mostly bdw_erratum_bdf14_fixup_needed;
+@@ -2822,7 +2830,7 @@ static int is_last_branch_msr(u32 ecx)
+ 
+ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
+ {
+-    const struct vcpu *curr = current;
++    struct vcpu *curr = current;
+ 
+     HVM_DBG_LOG(DBG_LEVEL_MSR, "ecx=%#x", msr);
+ 
+@@ -2901,7 +2909,7 @@ static int vmx_msr_read_intercept(unsigned int msr, uint64_t *msr_content)
+         if ( passive_domain_do_rdmsr(msr, msr_content) )
+             goto done;
+ 
+-        if ( vmx_read_guest_msr(msr, msr_content) == 0 )
++        if ( vmx_read_guest_msr(curr, msr, msr_content) == 0 )
+             break;
+ 
+         if ( is_last_branch_msr(msr) )
+@@ -3036,11 +3044,14 @@ void vmx_vlapic_msr_changed(struct vcpu *v)
+ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
+ {
+     struct vcpu *v = current;
++    const struct cpuid_policy *cp = v->domain->arch.cpuid;
+ 
+     HVM_DBG_LOG(DBG_LEVEL_MSR, "ecx=%#x, msr_value=%#"PRIx64, msr, msr_content);
+ 
+     switch ( msr )
+     {
++        uint64_t rsvd;
++
+     case MSR_IA32_SYSENTER_CS:
+         __vmwrite(GUEST_SYSENTER_CS, msr_content);
+         break;
+@@ -3093,45 +3104,85 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
+         wrmsrl(MSR_SYSCALL_MASK, msr_content);
+         break;
+ 
+-    case MSR_IA32_DEBUGCTLMSR: {
+-        int i, rc = 0;
+-        uint64_t supported = IA32_DEBUGCTLMSR_LBR | IA32_DEBUGCTLMSR_BTF;
++    case MSR_IA32_DEBUGCTLMSR:
++        rsvd = ~(IA32_DEBUGCTLMSR_LBR | IA32_DEBUGCTLMSR_BTF);
+ 
+-        if ( boot_cpu_has(X86_FEATURE_RTM) )
+-            supported |= IA32_DEBUGCTLMSR_RTM;
+-        if ( msr_content & ~supported )
++        /* TODO: Wire vPMU settings properly through the CPUID policy */
++        if ( vpmu_is_set(vcpu_vpmu(v), VPMU_CPU_HAS_BTS) )
+         {
+-            /* Perhaps some other bits are supported in vpmu. */
+-            if ( vpmu_do_wrmsr(msr, msr_content, supported) )
+-                break;
++            rsvd &= ~(IA32_DEBUGCTLMSR_TR | IA32_DEBUGCTLMSR_BTS |
++                      IA32_DEBUGCTLMSR_BTINT);
++
++            if ( cpu_has(&current_cpu_data, X86_FEATURE_DSCPL) )
++                rsvd &= ~(IA32_DEBUGCTLMSR_BTS_OFF_OS |
++                          IA32_DEBUGCTLMSR_BTS_OFF_USR);
+         }
+-        if ( msr_content & IA32_DEBUGCTLMSR_LBR )
++
++        if ( cp->feat.rtm )
++            rsvd &= ~IA32_DEBUGCTLMSR_RTM;
++
++        if ( msr_content & rsvd )
++            goto gp_fault;
++
++        /*
++         * When a guest first enables LBR, arrange to save and restore the LBR
++         * MSRs and allow the guest direct access.
++         *
++         * MSR_DEBUGCTL and LBR has existed almost as long as MSRs have
++         * existed, and there is no architectural way to hide the feature, or
++         * fail the attempt to enable LBR.
++         *
++         * Unknown host LBR MSRs or hitting -ENOSPC with the guest load/save
++         * list are definitely hypervisor bugs, whereas -ENOMEM for allocating
++         * the load/save list is simply unlucky (and shouldn't occur with
++         * sensible management by the toolstack).
++         *
++         * Either way, there is nothing we can do right now to recover, and
++         * the guest won't execute correctly either.  Simply crash the domain
++         * to make the failure obvious.
++         */
++        if ( !(v->arch.hvm_vmx.lbr_flags & LBR_MSRS_INSERTED) &&
++             (msr_content & IA32_DEBUGCTLMSR_LBR) )
+         {
+             const struct lbr_info *lbr = last_branch_msr_get();
+-            if ( lbr == NULL )
+-                break;
+ 
+-            for ( ; (rc == 0) && lbr->count; lbr++ )
+-                for ( i = 0; (rc == 0) && (i < lbr->count); i++ )
+-                    if ( (rc = vmx_add_guest_msr(lbr->base + i)) == 0 )
++            if ( unlikely(!lbr) )
++            {
++                gprintk(XENLOG_ERR, "Unknown Host LBR MSRs\n");
++                domain_crash(v->domain);
++                return X86EMUL_OKAY;
++            }
++
++            for ( ; lbr->count; lbr++ )
++            {
++                unsigned int i;
++
++                for ( i = 0; i < lbr->count; i++ )
++                {
++                    int rc = vmx_add_guest_msr(v, lbr->base + i, 0);
++
++                    if ( unlikely(rc) )
+                     {
+-                        vmx_clear_msr_intercept(v, lbr->base + i, VMX_MSR_RW);
+-                        if ( lbr_tsx_fixup_needed )
+-                            v->arch.hvm_vmx.lbr_fixup_enabled |= FIXUP_LBR_TSX;
+-                        if ( bdw_erratum_bdf14_fixup_needed )
+-                            v->arch.hvm_vmx.lbr_fixup_enabled |=
+-                                FIXUP_BDW_ERRATUM_BDF14;
++                        gprintk(XENLOG_ERR,
++                                "Guest load/save list error %d\n", rc);
++                        domain_crash(v->domain);
++                        return X86EMUL_OKAY;
+                     }
+-        }
+ 
+-        if ( (rc < 0) ||
+-             (msr_content && (vmx_add_host_load_msr(msr) < 0)) )
+-            hvm_inject_hw_exception(TRAP_machine_check, X86_EVENT_NO_EC);
+-        else
+-            __vmwrite(GUEST_IA32_DEBUGCTL, msr_content);
++                    vmx_clear_msr_intercept(v, lbr->base + i, VMX_MSR_RW);
++                }
++            }
++
++            v->arch.hvm_vmx.lbr_flags |= LBR_MSRS_INSERTED;
++            if ( lbr_tsx_fixup_needed )
++                v->arch.hvm_vmx.lbr_flags |= LBR_FIXUP_TSX;
++            if ( bdw_erratum_bdf14_fixup_needed )
++                v->arch.hvm_vmx.lbr_flags |= LBR_FIXUP_BDF14;
++        }
+ 
++        __vmwrite(GUEST_IA32_DEBUGCTL, msr_content);
+         break;
+-    }
++
+     case MSR_IA32_FEATURE_CONTROL:
+     case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
+         /* None of these MSRs are writeable. */
+@@ -3154,7 +3205,7 @@ static int vmx_msr_write_intercept(unsigned int msr, uint64_t msr_content)
+         if ( wrmsr_viridian_regs(msr, msr_content) ) 
+             break;
+ 
+-        if ( vmx_write_guest_msr(msr, msr_content) == 0 ||
++        if ( vmx_write_guest_msr(v, msr, msr_content) == 0 ||
+              is_last_branch_msr(msr) )
+             break;
+ 
+@@ -3701,6 +3752,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs)
+              */
+             __vmread(EXIT_QUALIFICATION, &exit_qualification);
+             HVMTRACE_1D(TRAP_DEBUG, exit_qualification);
++            __restore_debug_registers(v);
+             write_debugreg(6, exit_qualification | DR_STATUS_RESERVED_ONE);
+             if ( !v->domain->debugger_attached )
+             {
+@@ -4165,11 +4217,11 @@ out:
+ static void lbr_tsx_fixup(void)
+ {
+     struct vcpu *curr = current;
+-    unsigned int msr_count = curr->arch.hvm_vmx.msr_count;
++    unsigned int msr_count = curr->arch.hvm_vmx.msr_save_count;
+     struct vmx_msr_entry *msr_area = curr->arch.hvm_vmx.msr_area;
+     struct vmx_msr_entry *msr;
+ 
+-    if ( (msr = vmx_find_msr(lbr_from_start, VMX_GUEST_MSR)) != NULL )
++    if ( (msr = vmx_find_msr(curr, lbr_from_start, VMX_MSR_GUEST)) != NULL )
+     {
+         /*
+          * Sign extend into bits 61:62 while preserving bit 63
+@@ -4179,15 +4231,15 @@ static void lbr_tsx_fixup(void)
+             msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
+     }
+ 
+-    if ( (msr = vmx_find_msr(lbr_lastint_from, VMX_GUEST_MSR)) != NULL )
++    if ( (msr = vmx_find_msr(curr, lbr_lastint_from, VMX_MSR_GUEST)) != NULL )
+         msr->data |= ((LBR_FROM_SIGNEXT_2MSB & msr->data) << 2);
+ }
+ 
+-static void sign_extend_msr(u32 msr, int type)
++static void sign_extend_msr(struct vcpu *v, u32 msr, int type)
+ {
+     struct vmx_msr_entry *entry;
+ 
+-    if ( (entry = vmx_find_msr(msr, type)) != NULL )
++    if ( (entry = vmx_find_msr(v, msr, type)) != NULL )
+     {
+         if ( entry->data & VADDR_TOP_BIT )
+             entry->data |= CANONICAL_MASK;
+@@ -4198,6 +4250,8 @@ static void sign_extend_msr(u32 msr, int type)
+ 
+ static void bdw_erratum_bdf14_fixup(void)
+ {
++    struct vcpu *curr = current;
++
+     /*
+      * Occasionally, on certain Broadwell CPUs MSR_IA32_LASTINTTOIP has
+      * been observed to have the top three bits corrupted as though the
+@@ -4207,17 +4261,17 @@ static void bdw_erratum_bdf14_fixup(void)
+      * erratum BDF14. Fix up MSR_IA32_LASTINT{FROM,TO}IP by
+      * sign-extending into bits 48:63.
+      */
+-    sign_extend_msr(MSR_IA32_LASTINTFROMIP, VMX_GUEST_MSR);
+-    sign_extend_msr(MSR_IA32_LASTINTTOIP, VMX_GUEST_MSR);
++    sign_extend_msr(curr, MSR_IA32_LASTINTFROMIP, VMX_MSR_GUEST);
++    sign_extend_msr(curr, MSR_IA32_LASTINTTOIP, VMX_MSR_GUEST);
+ }
+ 
+ static void lbr_fixup(void)
+ {
+     struct vcpu *curr = current;
+ 
+-    if ( curr->arch.hvm_vmx.lbr_fixup_enabled & FIXUP_LBR_TSX )
++    if ( curr->arch.hvm_vmx.lbr_flags & LBR_FIXUP_TSX )
+         lbr_tsx_fixup();
+-    if ( curr->arch.hvm_vmx.lbr_fixup_enabled & FIXUP_BDW_ERRATUM_BDF14 )
++    if ( curr->arch.hvm_vmx.lbr_flags & LBR_FIXUP_BDF14 )
+         bdw_erratum_bdf14_fixup();
+ }
+ 
+@@ -4285,7 +4339,7 @@ bool vmx_vmenter_helper(const struct cpu_user_regs *regs)
+     }
+ 
+  out:
+-    if ( unlikely(curr->arch.hvm_vmx.lbr_fixup_enabled) )
++    if ( unlikely(curr->arch.hvm_vmx.lbr_flags & LBR_FIXUP_MASK) )
+         lbr_fixup();
+ 
+     HVMTRACE_ND(VMENTRY, 0, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index bcf46c0743..7d4871b791 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -613,6 +613,9 @@ static int alloc_segdesc_page(struct page_info *page)
+     return i == 512 ? 0 : -EINVAL;
+ }
+ 
++static int _get_page_type(struct page_info *page, unsigned long type,
++                          bool preemptible);
++
+ static int get_page_and_type_from_mfn(
+     mfn_t mfn, unsigned long type, struct domain *d,
+     int partial, int preemptible)
+@@ -624,9 +627,7 @@ static int get_page_and_type_from_mfn(
+          unlikely(!get_page_from_mfn(mfn, d)) )
+         return -EINVAL;
+ 
+-    rc = (preemptible ?
+-          get_page_type_preemptible(page, type) :
+-          (get_page_type(page, type) ? 0 : -EINVAL));
++    rc = _get_page_type(page, type, preemptible);
+ 
+     if ( unlikely(rc) && partial >= 0 &&
+          (!preemptible || page != current->arch.old_guest_table) )
+@@ -1115,7 +1116,7 @@ get_page_from_l2e(
+     int rc;
+ 
+     if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+-        return 1;
++        return pv_l1tf_check_l2e(d, l2e) ? -ERESTART : 1;
+ 
+     if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
+     {
+@@ -1146,7 +1147,7 @@ get_page_from_l3e(
+     int rc;
+ 
+     if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
+-        return 1;
++        return pv_l1tf_check_l3e(d, l3e) ? -ERESTART : 1;
+ 
+     if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
+     {
+@@ -1179,7 +1180,7 @@ get_page_from_l4e(
+     int rc;
+ 
+     if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
+-        return 1;
++        return pv_l1tf_check_l4e(d, l4e) ? -ERESTART : 1;
+ 
+     if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
+     {
+@@ -1389,6 +1390,13 @@ static int alloc_l1_table(struct page_info *page)
+ 
+     for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+     {
++        if ( !(l1e_get_flags(pl1e[i]) & _PAGE_PRESENT) )
++        {
++            ret = pv_l1tf_check_l1e(d, pl1e[i]) ? -ERESTART : 0;
++            if ( ret )
++                goto out;
++        }
++
+         switch ( ret = get_page_from_l1e(pl1e[i], d, d) )
+         {
+         default:
+@@ -1409,6 +1417,7 @@ static int alloc_l1_table(struct page_info *page)
+ 
+  fail:
+     gdprintk(XENLOG_WARNING, "Failure in alloc_l1_table: slot %#x\n", i);
++ out:
+     while ( i-- > 0 )
+         put_page_from_l1e(pl1e[i], d);
+ 
+@@ -1456,8 +1465,7 @@ static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
+     return 1;
+ }
+ 
+-static int alloc_l2_table(struct page_info *page, unsigned long type,
+-                          int preemptible)
++static int alloc_l2_table(struct page_info *page, unsigned long type)
+ {
+     struct domain *d = page_get_owner(page);
+     unsigned long  pfn = mfn_x(page_to_mfn(page));
+@@ -1469,8 +1477,7 @@ static int alloc_l2_table(struct page_info *page, unsigned long type,
+ 
+     for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ )
+     {
+-        if ( preemptible && i > page->nr_validated_ptes
+-             && hypercall_preempt_check() )
++        if ( i > page->nr_validated_ptes && hypercall_preempt_check() )
+         {
+             page->nr_validated_ptes = i;
+             rc = -ERESTART;
+@@ -1481,6 +1488,12 @@ static int alloc_l2_table(struct page_info *page, unsigned long type,
+              (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 )
+             continue;
+ 
++        if ( unlikely(rc == -ERESTART) )
++        {
++            page->nr_validated_ptes = i;
++            break;
++        }
++
+         if ( rc < 0 )
+         {
+             gdprintk(XENLOG_WARNING, "Failure in alloc_l2_table: slot %#x\n", i);
+@@ -1763,7 +1776,7 @@ static void free_l1_table(struct page_info *page)
+ }
+ 
+ 
+-static int free_l2_table(struct page_info *page, int preemptible)
++static int free_l2_table(struct page_info *page)
+ {
+     struct domain *d = page_get_owner(page);
+     unsigned long pfn = mfn_x(page_to_mfn(page));
+@@ -1777,7 +1790,7 @@ static int free_l2_table(struct page_info *page, int preemptible)
+     do {
+         if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) &&
+              put_page_from_l2e(pl2e[i], pfn) == 0 &&
+-             preemptible && i && hypercall_preempt_check() )
++             i && hypercall_preempt_check() )
+         {
+            page->nr_validated_ptes = i;
+            err = -ERESTART;
+@@ -2055,6 +2068,8 @@ static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
+             rc = -EBUSY;
+         }
+     }
++    else if ( pv_l1tf_check_l1e(pt_dom, nl1e) )
++        return -ERESTART;
+     else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
+                                      preserve_ad)) )
+     {
+@@ -2118,6 +2133,8 @@ static int mod_l2_entry(l2_pgentry_t *pl2e,
+             rc = -EBUSY;
+         }
+     }
++    else if ( pv_l1tf_check_l2e(d, nl2e) )
++        return -ERESTART;
+     else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
+                                      preserve_ad)) )
+     {
+@@ -2179,6 +2196,8 @@ static int mod_l3_entry(l3_pgentry_t *pl3e,
+             rc = -EFAULT;
+         }
+     }
++    else if ( pv_l1tf_check_l3e(d, nl3e) )
++        return -ERESTART;
+     else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu,
+                                      preserve_ad)) )
+     {
+@@ -2244,6 +2263,8 @@ static int mod_l4_entry(l4_pgentry_t *pl4e,
+             rc = -EFAULT;
+         }
+     }
++    else if ( pv_l1tf_check_l4e(d, nl4e) )
++        return -ERESTART;
+     else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu,
+                                      preserve_ad)) )
+     {
+@@ -2373,7 +2394,8 @@ static int alloc_page_type(struct page_info *page, unsigned long type,
+         rc = alloc_l1_table(page);
+         break;
+     case PGT_l2_page_table:
+-        rc = alloc_l2_table(page, type, preemptible);
++        ASSERT(preemptible);
++        rc = alloc_l2_table(page, type);
+         break;
+     case PGT_l3_page_table:
+         ASSERT(preemptible);
+@@ -2463,7 +2485,8 @@ int free_page_type(struct page_info *page, unsigned long type,
+         rc = 0;
+         break;
+     case PGT_l2_page_table:
+-        rc = free_l2_table(page, preemptible);
++        ASSERT(preemptible);
++        rc = free_l2_table(page);
+         break;
+     case PGT_l3_page_table:
+         ASSERT(preemptible);
+@@ -3550,12 +3573,9 @@ long do_mmuext_op(
+     }
+ 
+     if ( rc == -ERESTART )
+-    {
+-        ASSERT(i < count);
+         rc = hypercall_create_continuation(
+             __HYPERVISOR_mmuext_op, "hihi",
+             uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+-    }
+     else if ( curr->arch.old_guest_table )
+     {
+         XEN_GUEST_HANDLE_PARAM(void) null;
+@@ -3861,12 +3881,9 @@ long do_mmu_update(
+     }
+ 
+     if ( rc == -ERESTART )
+-    {
+-        ASSERT(i < count);
+         rc = hypercall_create_continuation(
+             __HYPERVISOR_mmu_update, "hihi",
+             ureqs, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
+-    }
+     else if ( curr->arch.old_guest_table )
+     {
+         XEN_GUEST_HANDLE_PARAM(void) null;
+@@ -4121,7 +4138,13 @@ static int __do_update_va_mapping(
+ long do_update_va_mapping(unsigned long va, u64 val64,
+                           unsigned long flags)
+ {
+-    return __do_update_va_mapping(va, val64, flags, current->domain);
++    int rc = __do_update_va_mapping(va, val64, flags, current->domain);
++
++    if ( rc == -ERESTART )
++        rc = hypercall_create_continuation(
++            __HYPERVISOR_update_va_mapping, "lll", va, val64, flags);
++
++    return rc;
+ }
+ 
+ long do_update_va_mapping_otherdomain(unsigned long va, u64 val64,
+@@ -4138,6 +4161,46 @@ long do_update_va_mapping_otherdomain(unsigned long va, u64 val64,
+ 
+     put_pg_owner(pg_owner);
+ 
++    if ( rc == -ERESTART )
++        rc = hypercall_create_continuation(
++            __HYPERVISOR_update_va_mapping_otherdomain,
++            "llli", va, val64, flags, domid);
++
++    return rc;
++}
++
++int compat_update_va_mapping(unsigned int va, uint32_t lo, uint32_t hi,
++                             unsigned int flags)
++{
++    int rc = __do_update_va_mapping(va, ((uint64_t)hi << 32) | lo,
++                                    flags, current->domain);
++
++    if ( rc == -ERESTART )
++        rc = hypercall_create_continuation(
++            __HYPERVISOR_update_va_mapping, "iiii", va, lo, hi, flags);
++
++    return rc;
++}
++
++int compat_update_va_mapping_otherdomain(unsigned int va,
++                                         uint32_t lo, uint32_t hi,
++                                         unsigned int flags, domid_t domid)
++{
++    struct domain *pg_owner;
++    int rc;
++
++    if ( (pg_owner = get_pg_owner(domid)) == NULL )
++        return -ESRCH;
++
++    rc = __do_update_va_mapping(va, ((uint64_t)hi << 32) | lo, flags, pg_owner);
++
++    put_pg_owner(pg_owner);
++
++    if ( rc == -ERESTART )
++        rc = hypercall_create_continuation(
++            __HYPERVISOR_update_va_mapping_otherdomain,
++            "iiiii", va, lo, hi, flags, domid);
++
+     return rc;
+ }
+ 
+diff --git a/xen/arch/x86/mm/paging.c b/xen/arch/x86/mm/paging.c
+index 2b0445ffe9..dcee496eb0 100644
+--- a/xen/arch/x86/mm/paging.c
++++ b/xen/arch/x86/mm/paging.c
+@@ -873,6 +873,8 @@ void paging_dump_domain_info(struct domain *d)
+         printk("    paging assistance: ");
+         if ( paging_mode_shadow(d) )
+             printk("shadow ");
++        if ( paging_mode_sh_forced(d) )
++            printk("forced ");
+         if ( paging_mode_hap(d) )
+             printk("hap ");
+         if ( paging_mode_refcounts(d) )
+diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c
+index dd61b50eb7..fd42d734e7 100644
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -3177,6 +3177,15 @@ static void sh_new_mode(struct domain *d, u32 new_mode)
+     ASSERT(paging_locked_by_me(d));
+     ASSERT(d != current->domain);
+ 
++    /*
++     * If PG_SH_forced has previously been activated because of writing an
++     * L1TF-vulnerable PTE, it must remain active for the remaining lifetime
++     * of the domain, even if the logdirty mode needs to be controlled for
++     * migration purposes.
++     */
++    if ( paging_mode_sh_forced(d) )
++        new_mode |= PG_SH_forced | PG_SH_enable;
++
+     d->arch.paging.mode = new_mode;
+     for_each_vcpu(d, v)
+         sh_update_paging_modes(v);
+@@ -4057,6 +4066,33 @@ void shadow_audit_tables(struct vcpu *v)
+ 
+ #endif /* Shadow audit */
+ 
++#ifdef CONFIG_PV
++
++void pv_l1tf_tasklet(unsigned long data)
++{
++    struct domain *d = (void *)data;
++
++    domain_pause(d);
++    paging_lock(d);
++
++    if ( !paging_mode_sh_forced(d) && !d->is_dying )
++    {
++        int ret = shadow_one_bit_enable(d, PG_SH_forced);
++
++        if ( ret )
++        {
++            printk(XENLOG_G_ERR "d%d Failed to enable PG_SH_forced: %d\n",
++                   d->domain_id, ret);
++            domain_crash(d);
++        }
++    }
++
++    paging_unlock(d);
++    domain_unpause(d);
++}
++
++#endif /* CONFIG_PV */
++
+ /*
+  * Local variables:
+  * mode: C
+diff --git a/xen/arch/x86/mpparse.c b/xen/arch/x86/mpparse.c
+index 49140e46f0..f3f6d48668 100644
+--- a/xen/arch/x86/mpparse.c
++++ b/xen/arch/x86/mpparse.c
+@@ -68,19 +68,26 @@ physid_mask_t phys_cpu_present_map;
+ 
+ void __init set_nr_cpu_ids(unsigned int max_cpus)
+ {
++	unsigned int tot_cpus = num_processors + disabled_cpus;
++
+ 	if (!max_cpus)
+-		max_cpus = num_processors + disabled_cpus;
++		max_cpus = tot_cpus;
+ 	if (max_cpus > NR_CPUS)
+ 		max_cpus = NR_CPUS;
+ 	else if (!max_cpus)
+ 		max_cpus = 1;
+ 	printk(XENLOG_INFO "SMP: Allowing %u CPUs (%d hotplug CPUs)\n",
+ 	       max_cpus, max_t(int, max_cpus - num_processors, 0));
+-	nr_cpu_ids = max_cpus;
++
++	if (!park_offline_cpus)
++		tot_cpus = max_cpus;
++	nr_cpu_ids = min(tot_cpus, NR_CPUS + 0u);
++	if (park_offline_cpus && nr_cpu_ids < num_processors)
++		printk(XENLOG_WARNING "SMP: Cannot bring up %u further CPUs\n",
++		       num_processors - nr_cpu_ids);
+ 
+ #ifndef nr_cpumask_bits
+-	nr_cpumask_bits = (max_cpus + (BITS_PER_LONG - 1)) &
+-			  ~(BITS_PER_LONG - 1);
++	nr_cpumask_bits = ROUNDUP(nr_cpu_ids, BITS_PER_LONG);
+ 	printk(XENLOG_DEBUG "NR_CPUS:%u nr_cpumask_bits:%u\n",
+ 	       NR_CPUS, nr_cpumask_bits);
+ #endif
+diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
+index 1e12ccb729..1a591dd2b5 100644
+--- a/xen/arch/x86/msr.c
++++ b/xen/arch/x86/msr.c
+@@ -150,6 +150,7 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
+     case MSR_AMD_PATCHLOADER:
+     case MSR_IA32_UCODE_WRITE:
+     case MSR_PRED_CMD:
++    case MSR_FLUSH_CMD:
+         /* Write-only */
+         goto gp_fault;
+ 
+@@ -254,6 +255,17 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
+             wrmsrl(MSR_PRED_CMD, val);
+         break;
+ 
++    case MSR_FLUSH_CMD:
++        if ( !cp->feat.l1d_flush )
++            goto gp_fault; /* MSR available? */
++
++        if ( val & ~FLUSH_CMD_L1D )
++            goto gp_fault; /* Rsvd bit set? */
++
++        if ( v == curr )
++            wrmsrl(MSR_FLUSH_CMD, val);
++        break;
++
+     case MSR_INTEL_MISC_FEATURES_ENABLES:
+     {
+         bool old_cpuid_faulting = vp->misc_features_enables.cpuid_faulting;
+diff --git a/xen/arch/x86/oprofile/nmi_int.c b/xen/arch/x86/oprofile/nmi_int.c
+index d8f5230906..3dfb8fef93 100644
+--- a/xen/arch/x86/oprofile/nmi_int.c
++++ b/xen/arch/x86/oprofile/nmi_int.c
+@@ -182,7 +182,7 @@ int nmi_reserve_counters(void)
+ 	if (!allocate_msrs())
+ 		return -ENOMEM;
+ 
+-	/* We walk a thin line between law and rape here.
++	/*
+ 	 * We need to be careful to install our NMI handler
+ 	 * without actually triggering any NMIs as this will
+ 	 * break the core code horrifically.
+diff --git a/xen/arch/x86/percpu.c b/xen/arch/x86/percpu.c
+index c9997b7937..8be4ebddf4 100644
+--- a/xen/arch/x86/percpu.c
++++ b/xen/arch/x86/percpu.c
+@@ -28,7 +28,7 @@ static int init_percpu_area(unsigned int cpu)
+     char *p;
+ 
+     if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA )
+-        return -EBUSY;
++        return 0;
+ 
+     if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL )
+         return -ENOMEM;
+@@ -76,9 +76,12 @@ static int cpu_percpu_callback(
+         break;
+     case CPU_UP_CANCELED:
+     case CPU_DEAD:
+-        free_percpu_area(cpu);
++        if ( !park_offline_cpus )
++            free_percpu_area(cpu);
+         break;
+-    default:
++    case CPU_REMOVE:
++        if ( park_offline_cpus )
++            free_percpu_area(cpu);
+         break;
+     }
+ 
+diff --git a/xen/arch/x86/pv/domain.c b/xen/arch/x86/pv/domain.c
+index a4f0bd239d..3230ac6a22 100644
+--- a/xen/arch/x86/pv/domain.c
++++ b/xen/arch/x86/pv/domain.c
+@@ -13,6 +13,7 @@
+ #include <asm/invpcid.h>
+ #include <asm/spec_ctrl.h>
+ #include <asm/pv/domain.h>
++#include <asm/shadow.h>
+ 
+ static __read_mostly enum {
+     PCID_OFF,
+@@ -209,6 +210,8 @@ int pv_vcpu_initialise(struct vcpu *v)
+ 
+ void pv_domain_destroy(struct domain *d)
+ {
++    pv_l1tf_domain_destroy(d);
++
+     destroy_perdomain_mapping(d, GDT_LDT_VIRT_START,
+                               GDT_LDT_MBYTES << (20 - PAGE_SHIFT));
+ 
+@@ -229,6 +232,8 @@ int pv_domain_initialise(struct domain *d)
+     };
+     int rc = -ENOMEM;
+ 
++    pv_l1tf_domain_init(d);
++
+     d->arch.pv_domain.gdt_ldt_l1tab =
+         alloc_xenheap_pages(0, MEMF_node(domain_to_node(d)));
+     if ( !d->arch.pv_domain.gdt_ldt_l1tab )
+diff --git a/xen/arch/x86/pv/ro-page-fault.c b/xen/arch/x86/pv/ro-page-fault.c
+index aa8d5a7556..a3c0c2dd19 100644
+--- a/xen/arch/x86/pv/ro-page-fault.c
++++ b/xen/arch/x86/pv/ro-page-fault.c
+@@ -29,6 +29,7 @@
+ #include <asm/mm.h>
+ #include <asm/pci.h>
+ #include <asm/pv/mm.h>
++#include <asm/shadow.h>
+ 
+ #include "emulate.h"
+ #include "mm.h"
+@@ -129,6 +130,10 @@ static int ptwr_emulated_update(unsigned long addr, intpte_t *p_old,
+ 
+     /* Check the new PTE. */
+     nl1e = l1e_from_intpte(val);
++
++    if ( !(l1e_get_flags(nl1e) & _PAGE_PRESENT) && pv_l1tf_check_l1e(d, nl1e) )
++        return X86EMUL_RETRY;
++
+     switch ( ret = get_page_from_l1e(nl1e, d, d) )
+     {
+     default:
+diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
+index a3172ca92c..3cd3e81b30 100644
+--- a/xen/arch/x86/setup.c
++++ b/xen/arch/x86/setup.c
+@@ -62,6 +62,9 @@ boolean_param("nosmp", opt_nosmp);
+ static unsigned int __initdata max_cpus;
+ integer_param("maxcpus", max_cpus);
+ 
++int8_t __read_mostly opt_smt = -1;
++boolean_param("smt", opt_smt);
++
+ /* opt_invpcid: If false, don't use INVPCID instruction even if available. */
+ static bool __initdata opt_invpcid = true;
+ boolean_param("invpcid", opt_invpcid);
+@@ -665,7 +668,7 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+ {
+     char *memmap_type = NULL;
+     char *cmdline, *kextra, *loader;
+-    unsigned int initrdidx;
++    unsigned int initrdidx, num_parked = 0;
+     multiboot_info_t *mbi;
+     module_t *mod;
+     unsigned long nr_pages, raw_max_page, modules_headroom, *module_map;
+@@ -909,6 +912,18 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+     /* Sanitise the raw E820 map to produce a final clean version. */
+     max_page = raw_max_page = init_e820(memmap_type, &e820_raw);
+ 
++    if ( !efi_enabled(EFI_BOOT) )
++    {
++        /*
++         * Supplement the heuristics in l1tf_calculations() by assuming that
++         * anything referenced in the E820 may be cacheable.
++         */
++        l1tf_safe_maddr =
++            max(l1tf_safe_maddr,
++                ROUNDUP(e820_raw.map[e820_raw.nr_map - 1].addr +
++                        e820_raw.map[e820_raw.nr_map - 1].size, PAGE_SIZE));
++    }
++
+     /* Create a temporary copy of the E820 map. */
+     memcpy(&boot_e820, &e820, sizeof(e820));
+ 
+@@ -1494,7 +1509,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+     else
+     {
+         set_nr_cpu_ids(max_cpus);
+-        max_cpus = nr_cpu_ids;
++        if ( !max_cpus )
++            max_cpus = nr_cpu_ids;
+     }
+ 
+     if ( xen_guest )
+@@ -1617,16 +1633,30 @@ void __init noreturn __start_xen(unsigned long mbi_p)
+             /* Set up node_to_cpumask based on cpu_to_node[]. */
+             numa_add_cpu(i);
+ 
+-            if ( (num_online_cpus() < max_cpus) && !cpu_online(i) )
++            if ( (park_offline_cpus || num_online_cpus() < max_cpus) &&
++                 !cpu_online(i) )
+             {
+                 int ret = cpu_up(i);
+                 if ( ret != 0 )
+                     printk("Failed to bring up CPU %u (error %d)\n", i, ret);
++                else if ( num_online_cpus() > max_cpus ||
++                          (!opt_smt &&
++                           cpu_data[i].compute_unit_id == INVALID_CUID &&
++                           cpumask_weight(per_cpu(cpu_sibling_mask, i)) > 1) )
++                {
++                    ret = cpu_down(i);
++                    if ( !ret )
++                        ++num_parked;
++                    else
++                        printk("Could not re-offline CPU%u (%d)\n", i, ret);
++                }
+             }
+         }
+     }
+ 
+     printk("Brought up %ld CPUs\n", (long)num_online_cpus());
++    if ( num_parked )
++        printk(XENLOG_INFO "Parked %u CPUs\n", num_parked);
+     smp_cpus_done();
+ 
+     do_initcalls();
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index d4478e6132..7e76cc3d68 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -63,6 +63,8 @@ static cpumask_t scratch_cpu0mask;
+ cpumask_t cpu_online_map __read_mostly;
+ EXPORT_SYMBOL(cpu_online_map);
+ 
++bool __read_mostly park_offline_cpus;
++
+ unsigned int __read_mostly nr_sockets;
+ cpumask_t **__read_mostly socket_cpumask;
+ static cpumask_t *secondary_socket_cpumask;
+@@ -234,33 +236,41 @@ static void link_thread_siblings(int cpu1, int cpu2)
+     cpumask_set_cpu(cpu2, per_cpu(cpu_core_mask, cpu1));
+ }
+ 
+-static void set_cpu_sibling_map(int cpu)
++static void set_cpu_sibling_map(unsigned int cpu)
+ {
+-    int i;
++    unsigned int i;
+     struct cpuinfo_x86 *c = cpu_data;
+ 
+     cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
+ 
+     cpumask_set_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);
++    cpumask_set_cpu(cpu, per_cpu(cpu_core_mask, cpu));
++    cpumask_set_cpu(cpu, per_cpu(cpu_sibling_mask, cpu));
+ 
+     if ( c[cpu].x86_num_siblings > 1 )
+     {
+         for_each_cpu ( i, &cpu_sibling_setup_map )
+         {
+-            if ( cpu_has(c, X86_FEATURE_TOPOEXT) ) {
+-                if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+-                     (c[cpu].compute_unit_id == c[i].compute_unit_id) )
++            if ( cpu == i || c[cpu].phys_proc_id != c[i].phys_proc_id )
++                continue;
++            if ( c[cpu].compute_unit_id != INVALID_CUID &&
++                 c[i].compute_unit_id != INVALID_CUID )
++            {
++                if ( c[cpu].compute_unit_id == c[i].compute_unit_id )
+                     link_thread_siblings(cpu, i);
+-            } else if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+-                        (c[cpu].cpu_core_id == c[i].cpu_core_id) ) {
+-                link_thread_siblings(cpu, i);
+             }
++            else if ( c[cpu].cpu_core_id != XEN_INVALID_CORE_ID &&
++                      c[i].cpu_core_id != XEN_INVALID_CORE_ID )
++            {
++                if ( c[cpu].cpu_core_id == c[i].cpu_core_id )
++                    link_thread_siblings(cpu, i);
++            }
++            else
++                printk(XENLOG_WARNING
++                       "CPU%u: unclear relationship with CPU%u\n",
++                       cpu, i);
+         }
+     }
+-    else
+-    {
+-        cpumask_set_cpu(cpu, per_cpu(cpu_sibling_mask, cpu));
+-    }
+ 
+     if ( c[cpu].x86_max_cores == 1 )
+     {
+@@ -887,7 +897,14 @@ static void cleanup_cpu_root_pgt(unsigned int cpu)
+     }
+ }
+ 
+-static void cpu_smpboot_free(unsigned int cpu)
++/*
++ * The 'remove' boolean controls whether a CPU is just getting offlined (and
++ * parked), or outright removed / offlined without parking. Parked CPUs need
++ * things like their stack, GDT, IDT, TSS, and per-CPU data still available.
++ * A few other items, in particular CPU masks, are also retained, as it's
++ * difficult to prove that they're entirely unreferenced from parked CPUs.
++ */
++static void cpu_smpboot_free(unsigned int cpu, bool remove)
+ {
+     unsigned int order, socket = cpu_to_socket(cpu);
+     struct cpuinfo_x86 *c = cpu_data;
+@@ -898,15 +915,19 @@ static void cpu_smpboot_free(unsigned int cpu)
+         socket_cpumask[socket] = NULL;
+     }
+ 
+-    c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID;
+-    c[cpu].cpu_core_id = XEN_INVALID_CORE_ID;
+-    c[cpu].compute_unit_id = INVALID_CUID;
+     cpumask_clear_cpu(cpu, &cpu_sibling_setup_map);
+ 
+-    free_cpumask_var(per_cpu(cpu_sibling_mask, cpu));
+-    free_cpumask_var(per_cpu(cpu_core_mask, cpu));
+-    if ( per_cpu(scratch_cpumask, cpu) != &scratch_cpu0mask )
+-        free_cpumask_var(per_cpu(scratch_cpumask, cpu));
++    if ( remove )
++    {
++        c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID;
++        c[cpu].cpu_core_id = XEN_INVALID_CORE_ID;
++        c[cpu].compute_unit_id = INVALID_CUID;
++
++        FREE_CPUMASK_VAR(per_cpu(cpu_sibling_mask, cpu));
++        FREE_CPUMASK_VAR(per_cpu(cpu_core_mask, cpu));
++        if ( per_cpu(scratch_cpumask, cpu) != &scratch_cpu0mask )
++            FREE_CPUMASK_VAR(per_cpu(scratch_cpumask, cpu));
++    }
+ 
+     cleanup_cpu_root_pgt(cpu);
+ 
+@@ -928,19 +949,21 @@ static void cpu_smpboot_free(unsigned int cpu)
+     }
+ 
+     order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+-    free_xenheap_pages(per_cpu(gdt_table, cpu), order);
++    if ( remove )
++        FREE_XENHEAP_PAGES(per_cpu(gdt_table, cpu), order);
+ 
+     free_xenheap_pages(per_cpu(compat_gdt_table, cpu), order);
+ 
+-    order = get_order_from_bytes(IDT_ENTRIES * sizeof(idt_entry_t));
+-    free_xenheap_pages(idt_tables[cpu], order);
+-    idt_tables[cpu] = NULL;
+-
+-    if ( stack_base[cpu] != NULL )
++    if ( remove )
+     {
+-        memguard_unguard_stack(stack_base[cpu]);
+-        free_xenheap_pages(stack_base[cpu], STACK_ORDER);
+-        stack_base[cpu] = NULL;
++        order = get_order_from_bytes(IDT_ENTRIES * sizeof(idt_entry_t));
++        FREE_XENHEAP_PAGES(idt_tables[cpu], order);
++
++        if ( stack_base[cpu] )
++        {
++            memguard_unguard_stack(stack_base[cpu]);
++            FREE_XENHEAP_PAGES(stack_base[cpu], STACK_ORDER);
++        }
+     }
+ }
+ 
+@@ -955,15 +978,17 @@ static int cpu_smpboot_alloc(unsigned int cpu)
+     if ( node != NUMA_NO_NODE )
+         memflags = MEMF_node(node);
+ 
+-    stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, memflags);
++    if ( stack_base[cpu] == NULL )
++        stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, memflags);
+     if ( stack_base[cpu] == NULL )
+         goto out;
+     memguard_guard_stack(stack_base[cpu]);
+ 
+     order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+-    per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order, memflags);
++    gdt = per_cpu(gdt_table, cpu) ?: alloc_xenheap_pages(order, memflags);
+     if ( gdt == NULL )
+         goto out;
++    per_cpu(gdt_table, cpu) = gdt;
+     memcpy(gdt, boot_cpu_gdt_table, NR_RESERVED_GDT_PAGES * PAGE_SIZE);
+     BUILD_BUG_ON(NR_CPUS > 0x10000);
+     gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+@@ -975,7 +1000,8 @@ static int cpu_smpboot_alloc(unsigned int cpu)
+     gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
+ 
+     order = get_order_from_bytes(IDT_ENTRIES * sizeof(idt_entry_t));
+-    idt_tables[cpu] = alloc_xenheap_pages(order, memflags);
++    if ( idt_tables[cpu] == NULL )
++        idt_tables[cpu] = alloc_xenheap_pages(order, memflags);
+     if ( idt_tables[cpu] == NULL )
+         goto out;
+     memcpy(idt_tables[cpu], idt_table, IDT_ENTRIES * sizeof(idt_entry_t));
+@@ -1003,16 +1029,16 @@ static int cpu_smpboot_alloc(unsigned int cpu)
+          (secondary_socket_cpumask = xzalloc(cpumask_t)) == NULL )
+         goto out;
+ 
+-    if ( !(zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, cpu)) &&
+-           zalloc_cpumask_var(&per_cpu(cpu_core_mask, cpu)) &&
+-           alloc_cpumask_var(&per_cpu(scratch_cpumask, cpu))) )
++    if ( !(cond_zalloc_cpumask_var(&per_cpu(cpu_sibling_mask, cpu)) &&
++           cond_zalloc_cpumask_var(&per_cpu(cpu_core_mask, cpu)) &&
++           cond_alloc_cpumask_var(&per_cpu(scratch_cpumask, cpu))) )
+         goto out;
+ 
+     rc = 0;
+ 
+  out:
+     if ( rc )
+-        cpu_smpboot_free(cpu);
++        cpu_smpboot_free(cpu, true);
+ 
+     return rc;
+ }
+@@ -1030,9 +1056,10 @@ static int cpu_smpboot_callback(
+         break;
+     case CPU_UP_CANCELED:
+     case CPU_DEAD:
+-        cpu_smpboot_free(cpu);
++        cpu_smpboot_free(cpu, !park_offline_cpus);
+         break;
+-    default:
++    case CPU_REMOVE:
++        cpu_smpboot_free(cpu, true);
+         break;
+     }
+ 
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 08e6784c4c..f0c50d6703 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -19,10 +19,13 @@
+ #include <xen/errno.h>
+ #include <xen/init.h>
+ #include <xen/lib.h>
++#include <xen/warning.h>
+ 
+ #include <asm/microcode.h>
+ #include <asm/msr.h>
+ #include <asm/processor.h>
++#include <asm/pv/shim.h>
++#include <asm/setup.h>
+ #include <asm/spec_ctrl.h>
+ #include <asm/spec_ctrl_asm.h>
+ 
+@@ -45,11 +48,16 @@ static int8_t __initdata opt_ibrs = -1;
+ bool __read_mostly opt_ibpb = true;
+ bool __read_mostly opt_ssbd = false;
+ int8_t __read_mostly opt_eager_fpu = -1;
++int8_t __read_mostly opt_l1d_flush = -1;
+ 
+ bool __initdata bsp_delay_spec_ctrl;
+ uint8_t __read_mostly default_xen_spec_ctrl;
+ uint8_t __read_mostly default_spec_ctrl_flags;
+ 
++paddr_t __read_mostly l1tf_addr_mask, __read_mostly l1tf_safe_maddr;
++static bool __initdata cpu_has_bug_l1tf;
++static unsigned int __initdata l1d_maxphysaddr;
++
+ static int __init parse_bti(const char *s)
+ {
+     const char *ss;
+@@ -124,6 +132,17 @@ static int __init parse_spec_ctrl(const char *s)
+             opt_msr_sc_pv = false;
+             opt_msr_sc_hvm = false;
+ 
++            opt_eager_fpu = 0;
++
++            if ( opt_xpti < 0 )
++                opt_xpti = 0;
++
++            if ( opt_smt < 0 )
++                opt_smt = 1;
++
++            if ( opt_pv_l1tf < 0 )
++                opt_pv_l1tf = 0;
++
+         disable_common:
+             opt_rsb_pv = false;
+             opt_rsb_hvm = false;
+@@ -131,7 +150,8 @@ static int __init parse_spec_ctrl(const char *s)
+             opt_thunk = THUNK_JMP;
+             opt_ibrs = 0;
+             opt_ibpb = false;
+-            opt_eager_fpu = 0;
++            opt_ssbd = false;
++            opt_l1d_flush = 0;
+         }
+         else if ( val > 0 )
+             rc = -EINVAL;
+@@ -187,6 +207,8 @@ static int __init parse_spec_ctrl(const char *s)
+             opt_ssbd = val;
+         else if ( (val = parse_boolean("eager-fpu", s, ss)) >= 0 )
+             opt_eager_fpu = val;
++        else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 )
++            opt_l1d_flush = val;
+         else
+             rc = -EINVAL;
+ 
+@@ -197,6 +219,55 @@ static int __init parse_spec_ctrl(const char *s)
+ }
+ custom_param("spec-ctrl", parse_spec_ctrl);
+ 
++int8_t __read_mostly opt_pv_l1tf = -1;
++
++static __init int parse_pv_l1tf(const char *s)
++{
++    const char *ss;
++    int val, rc = 0;
++
++    /* Inhibit the defaults as an explicit choice has been given. */
++    if ( opt_pv_l1tf == -1 )
++        opt_pv_l1tf = 0;
++
++    /* Interpret 'pv-l1tf' alone in its positive boolean form. */
++    if ( *s == '\0' )
++        opt_xpti = OPT_PV_L1TF_DOM0 | OPT_PV_L1TF_DOMU;
++
++    do {
++        ss = strchr(s, ',');
++        if ( !ss )
++            ss = strchr(s, '\0');
++
++        switch ( parse_bool(s, ss) )
++        {
++        case 0:
++            opt_pv_l1tf = 0;
++            break;
++
++        case 1:
++            opt_pv_l1tf = OPT_PV_L1TF_DOM0 | OPT_PV_L1TF_DOMU;
++            break;
++
++        default:
++            if ( (val = parse_boolean("dom0", s, ss)) >= 0 )
++                opt_pv_l1tf = ((opt_pv_l1tf & ~OPT_PV_L1TF_DOM0) |
++                               (val ? OPT_PV_L1TF_DOM0 : 0));
++            else if ( (val = parse_boolean("domu", s, ss)) >= 0 )
++                opt_pv_l1tf = ((opt_pv_l1tf & ~OPT_PV_L1TF_DOMU) |
++                               (val ? OPT_PV_L1TF_DOMU : 0));
++            else
++                rc = -EINVAL;
++            break;
++        }
++
++        s = ss + 1;
++    } while ( *ss );
++
++    return rc;
++}
++custom_param("pv-l1tf", parse_pv_l1tf);
++
+ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+ {
+     unsigned int _7d0 = 0, e8b = 0, tmp;
+@@ -210,22 +281,31 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+     printk("Speculative mitigation facilities:\n");
+ 
+     /* Hardware features which pertain to speculative mitigations. */
+-    printk("  Hardware features:%s%s%s%s%s%s%s%s\n",
++    printk("  Hardware features:%s%s%s%s%s%s%s%s%s%s\n",
+            (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "",
+            (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP"     : "",
++           (_7d0 & cpufeat_mask(X86_FEATURE_L1D_FLUSH)) ? " L1D_FLUSH" : "",
+            (_7d0 & cpufeat_mask(X86_FEATURE_SSBD))  ? " SSBD"      : "",
+            (e8b  & cpufeat_mask(X86_FEATURE_IBPB))  ? " IBPB"      : "",
+            (caps & ARCH_CAPABILITIES_IBRS_ALL)      ? " IBRS_ALL"  : "",
+            (caps & ARCH_CAPABILITIES_RDCL_NO)       ? " RDCL_NO"   : "",
+            (caps & ARCH_CAPS_RSBA)                  ? " RSBA"      : "",
++           (caps & ARCH_CAPS_SKIP_L1DFL)            ? " SKIP_L1DFL": "",
+            (caps & ARCH_CAPS_SSB_NO)                ? " SSB_NO"    : "");
+ 
+-    /* Compiled-in support which pertains to BTI mitigations. */
+-    if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) )
+-        printk("  Compiled-in support: INDIRECT_THUNK\n");
++    /* Compiled-in support which pertains to mitigations. */
++    if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) )
++        printk("  Compiled-in support:"
++#ifdef CONFIG_INDIRECT_THUNK
++               " INDIRECT_THUNK"
++#endif
++#ifdef CONFIG_SHADOW_PAGING
++               " SHADOW_PAGING"
++#endif
++               "\n");
+ 
+     /* Settings for Xen's protection, irrespective of guests. */
+-    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s\n",
++    printk("  Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s%s\n",
+            thunk == THUNK_NONE      ? "N/A" :
+            thunk == THUNK_RETPOLINE ? "RETPOLINE" :
+            thunk == THUNK_LFENCE    ? "LFENCE" :
+@@ -234,7 +314,15 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+            (default_xen_spec_ctrl & SPEC_CTRL_IBRS)  ? "IBRS+" :  "IBRS-",
+            !boot_cpu_has(X86_FEATURE_SSBD)           ? "" :
+            (default_xen_spec_ctrl & SPEC_CTRL_SSBD)  ? " SSBD+" : " SSBD-",
+-           opt_ibpb                                  ? " IBPB"  : "");
++           opt_ibpb                                  ? " IBPB"  : "",
++           opt_l1d_flush                             ? " L1D_FLUSH" : "");
++
++    /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */
++    if ( cpu_has_bug_l1tf || opt_pv_l1tf )
++        printk("  L1TF: believed%s vulnerable, maxphysaddr L1D %u, CPUID %u"
++               ", Safe address %"PRIx64"\n",
++               cpu_has_bug_l1tf ? "" : " not",
++               l1d_maxphysaddr, paddr_bits, l1tf_safe_maddr);
+ 
+     /*
+      * Alternatives blocks for protecting against and/or virtualising
+@@ -257,6 +345,10 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps)
+     printk("  XPTI (64-bit PV only): Dom0 %s, DomU %s\n",
+            opt_xpti & OPT_XPTI_DOM0 ? "enabled" : "disabled",
+            opt_xpti & OPT_XPTI_DOMU ? "enabled" : "disabled");
++
++    printk("  PV L1TF shadowing: Dom0 %s, DomU %s\n",
++           opt_pv_l1tf & OPT_PV_L1TF_DOM0  ? "enabled"  : "disabled",
++           opt_pv_l1tf & OPT_PV_L1TF_DOMU  ? "enabled"  : "disabled");
+ }
+ 
+ /* Calculate whether Retpoline is known-safe on this CPU. */
+@@ -418,20 +510,159 @@ static bool __init should_use_eager_fpu(void)
+     }
+ }
+ 
+-#define OPT_XPTI_DEFAULT  0xff
+-uint8_t __read_mostly opt_xpti = OPT_XPTI_DEFAULT;
+-
+-static __init void xpti_init_default(bool force)
++/* Calculate whether this CPU is vulnerable to L1TF. */
++static __init void l1tf_calculations(uint64_t caps)
+ {
+-    uint64_t caps = 0;
++    bool hit_default = false;
++
++    l1d_maxphysaddr = paddr_bits;
++
++    /* L1TF is only known to affect Intel Family 6 processors at this time. */
++    if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
++         boot_cpu_data.x86 == 6 )
++    {
++        switch ( boot_cpu_data.x86_model )
++        {
++            /*
++             * Core processors since at least Penryn are vulnerable.
++             */
++        case 0x17: /* Penryn */
++        case 0x1d: /* Dunnington */
++            cpu_has_bug_l1tf = true;
++            break;
++
++        case 0x1f: /* Auburndale / Havendale */
++        case 0x1e: /* Nehalem */
++        case 0x1a: /* Nehalem EP */
++        case 0x2e: /* Nehalem EX */
++        case 0x25: /* Westmere */
++        case 0x2c: /* Westmere EP */
++        case 0x2f: /* Westmere EX */
++            cpu_has_bug_l1tf = true;
++            l1d_maxphysaddr = 44;
++            break;
++
++        case 0x2a: /* SandyBridge */
++        case 0x2d: /* SandyBridge EP/EX */
++        case 0x3a: /* IvyBridge */
++        case 0x3e: /* IvyBridge EP/EX */
++        case 0x3c: /* Haswell */
++        case 0x3f: /* Haswell EX/EP */
++        case 0x45: /* Haswell D */
++        case 0x46: /* Haswell H */
++        case 0x3d: /* Broadwell */
++        case 0x47: /* Broadwell H */
++        case 0x4f: /* Broadwell EP/EX */
++        case 0x56: /* Broadwell D */
++        case 0x4e: /* Skylake M */
++        case 0x55: /* Skylake X */
++        case 0x5e: /* Skylake D */
++        case 0x66: /* Cannonlake */
++        case 0x67: /* Cannonlake? */
++        case 0x8e: /* Kabylake M */
++        case 0x9e: /* Kabylake D */
++            cpu_has_bug_l1tf = true;
++            l1d_maxphysaddr = 46;
++            break;
++
++            /*
++             * Atom processors are not vulnerable.
++             */
++        case 0x1c: /* Pineview */
++        case 0x26: /* Lincroft */
++        case 0x27: /* Penwell */
++        case 0x35: /* Cloverview */
++        case 0x36: /* Cedarview */
++        case 0x37: /* Baytrail / Valleyview (Silvermont) */
++        case 0x4d: /* Avaton / Rangely (Silvermont) */
++        case 0x4c: /* Cherrytrail / Brasswell */
++        case 0x4a: /* Merrifield */
++        case 0x5a: /* Moorefield */
++        case 0x5c: /* Goldmont */
++        case 0x5f: /* Denverton */
++        case 0x7a: /* Gemini Lake */
++            break;
++
++            /*
++             * Knights processors are not vulnerable.
++             */
++        case 0x57: /* Knights Landing */
++        case 0x85: /* Knights Mill */
++            break;
++
++        default:
++            /* Defer printk() until we've accounted for RDCL_NO. */
++            hit_default = true;
++            cpu_has_bug_l1tf = true;
++            break;
++        }
++    }
++
++    /* Any processor advertising RDCL_NO should be not vulnerable to L1TF. */
++    if ( caps & ARCH_CAPABILITIES_RDCL_NO )
++        cpu_has_bug_l1tf = false;
++
++    if ( cpu_has_bug_l1tf && hit_default )
++        printk("Unrecognised CPU model %#x - assuming vulnerable to L1TF\n",
++               boot_cpu_data.x86_model);
++
++    /*
++     * L1TF safe address heuristics.  These apply to the real hardware we are
++     * running on, and are best-effort-only if Xen is virtualised.
++     *
++     * The address mask which the L1D cache uses, which might be wider than
++     * the CPUID-reported maxphysaddr.
++     */
++    l1tf_addr_mask = ((1ul << l1d_maxphysaddr) - 1) & PAGE_MASK;
++
++    /*
++     * To be safe, l1tf_safe_maddr must be above the highest cacheable entity
++     * in system physical address space.  However, to preserve space for
++     * paged-out metadata, it should be as low as possible above the highest
++     * cacheable address, so as to require fewer high-order bits being set.
++     *
++     * These heuristics are based on some guesswork to improve the likelihood
++     * of safety in the common case, including Linux's L1TF mitigation of
++     * inverting all address bits in a non-present PTE.
++     *
++     * - If L1D is wider than CPUID (Nehalem and later mobile/desktop/low end
++     *   server), setting any address bit beyond CPUID maxphysaddr guarantees
++     *   to make the PTE safe.  This case doesn't require all the high-order
++     *   bits being set, and doesn't require any other source of information
++     *   for safety.
++     *
++     * - If L1D is the same as CPUID (Pre-Nehalem, or high end server), we
++     *   must sacrifice high order bits from the real address space for
++     *   safety.  Therefore, make a blind guess that there is nothing
++     *   cacheable in the top quarter of physical address space.
++     *
++     *   It is exceedingly unlikely for machines to be populated with this
++     *   much RAM (likely 512G on pre-Nehalem, 16T on Nehalem/Westmere, 64T on
++     *   Sandybridge and later) due to the sheer volume of DIMMs this would
++     *   actually take.
++     *
++     *   However, it is possible to find machines this large, so the "top
++     *   quarter" guess is supplemented to push the limit higher if references
++     *   to cacheable mappings (E820/SRAT/EFI/etc) are found above the top
++     *   quarter boundary.
++     *
++     *   Finally, this top quarter guess gives us a good chance of being safe
++     *   when running virtualised (and the CPUID maxphysaddr hasn't been
++     *   levelled for heterogeneous migration safety), where the safety
++     *   consideration is still in terms of host details, but all E820/etc
++     *   information is in terms of guest physical layout.
++     */
++    l1tf_safe_maddr = max(l1tf_safe_maddr, ((l1d_maxphysaddr > paddr_bits)
++                                            ? (1ul << paddr_bits)
++                                            : (3ul << (paddr_bits - 2))));
++}
+ 
+-    if ( !force && (opt_xpti != OPT_XPTI_DEFAULT) )
+-        return;
++int8_t __read_mostly opt_xpti = -1;
+ 
++static __init void xpti_init_default(uint64_t caps)
++{
+     if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD )
+         caps = ARCH_CAPABILITIES_RDCL_NO;
+-    else if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) )
+-        rdmsrl(MSR_ARCH_CAPABILITIES, caps);
+ 
+     if ( caps & ARCH_CAPABILITIES_RDCL_NO )
+         opt_xpti = 0;
+@@ -444,7 +675,13 @@ static __init int parse_xpti(const char *s)
+     const char *ss;
+     int val, rc = 0;
+ 
+-    xpti_init_default(false);
++    /* Inhibit the defaults as an explicit choice has been given. */
++    if ( opt_xpti == -1 )
++        opt_xpti = 0;
++
++    /* Interpret 'xpti' alone in its positive boolean form. */
++    if ( *s == '\0' )
++        opt_xpti = OPT_XPTI_DOM0 | OPT_XPTI_DOMU;
+ 
+     do {
+         ss = strchr(s, ',');
+@@ -463,7 +700,7 @@ static __init int parse_xpti(const char *s)
+ 
+         default:
+             if ( !strcmp(s, "default") )
+-                xpti_init_default(true);
++                opt_xpti = -1;
+             else if ( (val = parse_boolean("dom0", s, ss)) >= 0 )
+                 opt_xpti = (opt_xpti & ~OPT_XPTI_DOM0) |
+                            (val ? OPT_XPTI_DOM0 : 0);
+@@ -625,12 +862,58 @@ void __init init_speculation_mitigations(void)
+     if ( default_xen_spec_ctrl )
+         setup_force_cpu_cap(X86_FEATURE_SC_MSR_IDLE);
+ 
+-    xpti_init_default(false);
++    if ( opt_xpti == -1 )
++        xpti_init_default(caps);
++
+     if ( opt_xpti == 0 )
+         setup_force_cpu_cap(X86_FEATURE_NO_XPTI);
+     else
+         setup_clear_cpu_cap(X86_FEATURE_NO_XPTI);
+ 
++    l1tf_calculations(caps);
++
++    /*
++     * By default, enable PV domU L1TF mitigations on all L1TF-vulnerable
++     * hardware, except when running in shim mode.
++     *
++     * In shim mode, SHADOW is expected to be compiled out, and a malicious
++     * guest kernel can only attack the shim Xen, not the host Xen.
++     */
++    if ( opt_pv_l1tf == -1 )
++    {
++        if ( pv_shim || !cpu_has_bug_l1tf )
++            opt_pv_l1tf = 0;
++        else
++            opt_pv_l1tf = OPT_PV_L1TF_DOMU;
++    }
++
++    /*
++     * By default, enable L1D_FLUSH on L1TF-vulnerable hardware, unless
++     * instructed to skip the flush on vmentry by our outer hypervisor.
++     */
++    if ( !boot_cpu_has(X86_FEATURE_L1D_FLUSH) )
++        opt_l1d_flush = 0;
++    else if ( opt_l1d_flush == -1 )
++        opt_l1d_flush = cpu_has_bug_l1tf && !(caps & ARCH_CAPS_SKIP_L1DFL);
++
++    /*
++     * We do not disable HT by default on affected hardware.
++     *
++     * Firstly, if the user intends to use exclusively PV, or HVM shadow
++     * guests, HT isn't a concern and should remain fully enabled.  Secondly,
++     * safety for HVM HAP guests can be arranged by the toolstack with core
++     * parking, pinning or cpupool configurations, including mixed setups.
++     *
++     * However, if we are on affected hardware, with HT enabled, and the user
++     * hasn't explicitly chosen whether to use HT or not, nag them to do so.
++     */
++    if ( opt_smt == -1 && cpu_has_bug_l1tf && !pv_shim &&
++         boot_cpu_data.x86_num_siblings > 1 )
++        warning_add(
++            "Booted on L1TF-vulnerable hardware with SMT/Hyperthreading\n"
++            "enabled.  Please assess your configuration and choose an\n"
++            "explicit 'smt=<bool>' setting.  See XSA-273.\n");
++
+     print_details(thunk, caps);
+ 
+     /*
+diff --git a/xen/arch/x86/srat.c b/xen/arch/x86/srat.c
+index 166eb44fe2..2d70b45909 100644
+--- a/xen/arch/x86/srat.c
++++ b/xen/arch/x86/srat.c
+@@ -20,6 +20,7 @@
+ #include <xen/pfn.h>
+ #include <asm/e820.h>
+ #include <asm/page.h>
++#include <asm/spec_ctrl.h>
+ 
+ static struct acpi_table_slit *__read_mostly acpi_slit;
+ 
+@@ -284,6 +285,11 @@ acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
+ 	if (!(ma->flags & ACPI_SRAT_MEM_ENABLED))
+ 		return;
+ 
++	start = ma->base_address;
++	end = start + ma->length;
++	/* Supplement the heuristics in l1tf_calculations(). */
++	l1tf_safe_maddr = max(l1tf_safe_maddr, ROUNDUP(end, PAGE_SIZE));
++
+ 	if (num_node_memblks >= NR_NODE_MEMBLKS)
+ 	{
+ 		dprintk(XENLOG_WARNING,
+@@ -292,8 +298,6 @@ acpi_numa_memory_affinity_init(const struct acpi_srat_mem_affinity *ma)
+ 		return;
+ 	}
+ 
+-	start = ma->base_address;
+-	end = start + ma->length;
+ 	pxm = ma->proximity_domain;
+ 	if (srat_rev < 2)
+ 		pxm &= 0xff;
+diff --git a/xen/arch/x86/sysctl.c b/xen/arch/x86/sysctl.c
+index 4d372db12b..e704ed7f1c 100644
+--- a/xen/arch/x86/sysctl.c
++++ b/xen/arch/x86/sysctl.c
+@@ -23,6 +23,7 @@
+ #include <asm/hvm/hvm.h>
+ #include <asm/hvm/support.h>
+ #include <asm/processor.h>
++#include <asm/setup.h>
+ #include <asm/smp.h>
+ #include <asm/numa.h>
+ #include <xen/nodemask.h>
+@@ -48,14 +49,27 @@ static void l3_cache_get(void *arg)
+ 
+ long cpu_up_helper(void *data)
+ {
+-    int cpu = (unsigned long)data;
++    unsigned int cpu = (unsigned long)data;
+     int ret = cpu_up(cpu);
++
+     if ( ret == -EBUSY )
+     {
+         /* On EBUSY, flush RCU work and have one more go. */
+         rcu_barrier();
+         ret = cpu_up(cpu);
+     }
++
++    if ( !ret && !opt_smt &&
++         cpu_data[cpu].compute_unit_id == INVALID_CUID &&
++         cpumask_weight(per_cpu(cpu_sibling_mask, cpu)) > 1 )
++    {
++        ret = cpu_down_helper(data);
++        if ( ret )
++            printk("Could not re-offline CPU%u (%d)\n", cpu, ret);
++        else
++            ret = -EPERM;
++    }
++
+     return ret;
+ }
+ 
+diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c
+index 9f045a2045..789d7ff8cd 100644
+--- a/xen/arch/x86/traps.c
++++ b/xen/arch/x86/traps.c
+@@ -96,8 +96,6 @@ string_param("nmi", opt_nmi);
+ DEFINE_PER_CPU(uint64_t, efer);
+ static DEFINE_PER_CPU(unsigned long, last_extable_addr);
+ 
+-DEFINE_PER_CPU_READ_MOSTLY(u32, ler_msr);
+-
+ DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, gdt_table);
+ DEFINE_PER_CPU_READ_MOSTLY(struct desc_struct *, compat_gdt_table);
+ 
+@@ -117,6 +115,9 @@ integer_param("debug_stack_lines", debug_stack_lines);
+ static bool opt_ler;
+ boolean_param("ler", opt_ler);
+ 
++/* LastExceptionFromIP on this hardware.  Zero if LER is not in use. */
++unsigned int __read_mostly ler_msr;
++
+ #define stack_words_per_line 4
+ #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
+ 
+@@ -1778,17 +1779,6 @@ void do_device_not_available(struct cpu_user_regs *regs)
+     return;
+ }
+ 
+-static void ler_enable(void)
+-{
+-    u64 debugctl;
+-
+-    if ( !this_cpu(ler_msr) )
+-        return;
+-
+-    rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+-    wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl | IA32_DEBUGCTLMSR_LBR);
+-}
+-
+ void do_debug(struct cpu_user_regs *regs)
+ {
+     unsigned long dr6;
+@@ -1821,6 +1811,10 @@ void do_debug(struct cpu_user_regs *regs)
+      */
+     write_debugreg(6, X86_DR6_DEFAULT);
+ 
++    /* #DB automatically disabled LBR.  Reinstate it if debugging Xen. */
++    if ( cpu_has_xen_lbr )
++        wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR);
++
+     if ( !guest_mode(regs) )
+     {
+         /*
+@@ -1838,7 +1832,7 @@ void do_debug(struct cpu_user_regs *regs)
+             {
+                 if ( regs->rip == (unsigned long)sysenter_eflags_saved )
+                     regs->eflags &= ~X86_EFLAGS_TF;
+-                goto out;
++                return;
+             }
+             if ( !debugger_trap_fatal(TRAP_debug, regs) )
+             {
+@@ -1895,20 +1889,14 @@ void do_debug(struct cpu_user_regs *regs)
+                 regs->cs, _p(regs->rip), _p(regs->rip),
+                 regs->ss, _p(regs->rsp), dr6);
+ 
+-        goto out;
++        return;
+     }
+ 
+     /* Save debug status register where guest OS can peek at it */
+     v->arch.debugreg[6] |= (dr6 & ~X86_DR6_DEFAULT);
+     v->arch.debugreg[6] &= (dr6 | ~X86_DR6_DEFAULT);
+ 
+-    ler_enable();
+     pv_inject_hw_exception(TRAP_debug, X86_EVENT_NO_EC);
+-    return;
+-
+- out:
+-    ler_enable();
+-    return;
+ }
+ 
+ static void __init noinline __set_intr_gate(unsigned int n,
+@@ -1952,38 +1940,46 @@ void load_TR(void)
+         : "=m" (old_gdt) : "rm" (TSS_ENTRY << 3), "m" (tss_gdt) : "memory" );
+ }
+ 
+-void percpu_traps_init(void)
++static unsigned int calc_ler_msr(void)
+ {
+-    subarch_percpu_traps_init();
+-
+-    if ( !opt_ler )
+-        return;
+-
+     switch ( boot_cpu_data.x86_vendor )
+     {
+     case X86_VENDOR_INTEL:
+         switch ( boot_cpu_data.x86 )
+         {
+         case 6:
+-            this_cpu(ler_msr) = MSR_IA32_LASTINTFROMIP;
+-            break;
++            return MSR_IA32_LASTINTFROMIP;
++
+         case 15:
+-            this_cpu(ler_msr) = MSR_P4_LER_FROM_LIP;
+-            break;
++            return MSR_P4_LER_FROM_LIP;
+         }
+         break;
++
+     case X86_VENDOR_AMD:
+         switch ( boot_cpu_data.x86 )
+         {
+         case 6:
+         case 0xf ... 0x17:
+-            this_cpu(ler_msr) = MSR_IA32_LASTINTFROMIP;
+-            break;
++            return MSR_IA32_LASTINTFROMIP;
+         }
+         break;
+     }
+ 
+-    ler_enable();
++    return 0;
++}
++
++void percpu_traps_init(void)
++{
++    subarch_percpu_traps_init();
++
++    if ( !opt_ler )
++        return;
++
++    if ( !ler_msr && (ler_msr = calc_ler_msr()) )
++        setup_force_cpu_cap(X86_FEATURE_XEN_LBR);
++
++    if ( cpu_has_xen_lbr )
++        wrmsrl(MSR_IA32_DEBUGCTLMSR, IA32_DEBUGCTLMSR_LBR);
+ }
+ 
+ void __init init_idt_traps(void)
+diff --git a/xen/arch/x86/x86_64/compat/mm.c b/xen/arch/x86/x86_64/compat/mm.c
+index c2aa6f2fdb..02bc75b91e 100644
+--- a/xen/arch/x86/x86_64/compat/mm.c
++++ b/xen/arch/x86/x86_64/compat/mm.c
+@@ -163,19 +163,6 @@ int compat_arch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
+     return rc;
+ }
+ 
+-int compat_update_va_mapping(unsigned int va, u32 lo, u32 hi,
+-                             unsigned int flags)
+-{
+-    return do_update_va_mapping(va, lo | ((u64)hi << 32), flags);
+-}
+-
+-int compat_update_va_mapping_otherdomain(unsigned long va, u32 lo, u32 hi,
+-                                         unsigned long flags,
+-                                         domid_t domid)
+-{
+-    return do_update_va_mapping_otherdomain(va, lo | ((u64)hi << 32), flags, domid);
+-}
+-
+ DEFINE_XEN_GUEST_HANDLE(mmuext_op_compat_t);
+ 
+ int compat_mmuext_op(XEN_GUEST_HANDLE_PARAM(void) arg,
+diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c
+index f7f6928d70..b0401850ef 100644
+--- a/xen/arch/x86/x86_64/traps.c
++++ b/xen/arch/x86/x86_64/traps.c
+@@ -144,11 +144,12 @@ void show_registers(const struct cpu_user_regs *regs)
+     printk("CPU:    %d\n", smp_processor_id());
+     _show_registers(&fault_regs, fault_crs, context, v);
+ 
+-    if ( this_cpu(ler_msr) && !guest_mode(regs) )
++    if ( ler_msr && !guest_mode(regs) )
+     {
+         u64 from, to;
+-        rdmsrl(this_cpu(ler_msr), from);
+-        rdmsrl(this_cpu(ler_msr) + 1, to);
++
++        rdmsrl(ler_msr, from);
++        rdmsrl(ler_msr + 1, to);
+         printk("ler: %016lx -> %016lx\n", from, to);
+     }
+ }
+diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c
+index b4aea4b50a..15edd5df96 100644
+--- a/xen/arch/x86/xstate.c
++++ b/xen/arch/x86/xstate.c
+@@ -670,12 +670,17 @@ static bool valid_xcr0(u64 xcr0)
+     return !(xcr0 & X86_XCR0_BNDREGS) == !(xcr0 & X86_XCR0_BNDCSR);
+ }
+ 
+-int validate_xstate(u64 xcr0, u64 xcr0_accum, const struct xsave_hdr *hdr)
++int validate_xstate(const struct domain *d, uint64_t xcr0, uint64_t xcr0_accum,
++                    const struct xsave_hdr *hdr)
+ {
++    const struct cpuid_policy *cp = d->arch.cpuid;
++    uint64_t xcr0_max =
++        ((uint64_t)cp->xstate.xcr0_high << 32) | cp->xstate.xcr0_low;
+     unsigned int i;
+ 
+     if ( (hdr->xstate_bv & ~xcr0_accum) ||
+          (xcr0 & ~xcr0_accum) ||
++         (xcr0_accum & ~xcr0_max) ||
+          !valid_xcr0(xcr0) ||
+          !valid_xcr0(xcr0_accum) )
+         return -EINVAL;
+@@ -694,20 +699,40 @@ int validate_xstate(u64 xcr0, u64 xcr0_accum, const struct xsave_hdr *hdr)
+ int handle_xsetbv(u32 index, u64 new_bv)
+ {
+     struct vcpu *curr = current;
++    const struct cpuid_policy *cp = curr->domain->arch.cpuid;
++    uint64_t xcr0_max =
++        ((uint64_t)cp->xstate.xcr0_high << 32) | cp->xstate.xcr0_low;
+     u64 mask;
+ 
+     if ( index != XCR_XFEATURE_ENABLED_MASK )
+         return -EOPNOTSUPP;
+ 
+-    if ( (new_bv & ~xfeature_mask) || !valid_xcr0(new_bv) )
++    /*
++     * The CPUID logic shouldn't be able to hand out an XCR0 exceeding Xen's
++     * maximum features, but keep the check for robustness.
++     */
++    if ( unlikely(xcr0_max & ~xfeature_mask) )
++    {
++        gprintk(XENLOG_ERR,
++                "xcr0_max %016" PRIx64 " exceeds hardware max %016" PRIx64 "\n",
++                xcr0_max, xfeature_mask);
++        domain_crash(curr->domain);
++
++        return -EINVAL;
++    }
++
++    if ( (new_bv & ~xcr0_max) || !valid_xcr0(new_bv) )
+         return -EINVAL;
+ 
+-    /* XCR0.PKRU is disabled on PV mode. */
+-    if ( is_pv_vcpu(curr) && (new_bv & X86_XCR0_PKRU) )
+-        return -EOPNOTSUPP;
++    /* By this point, new_bv really should be accepted by hardware. */
++    if ( unlikely(!set_xcr0(new_bv)) )
++    {
++        gprintk(XENLOG_ERR, "new_bv %016" PRIx64 " rejected by hardware\n",
++                new_bv);
++        domain_crash(curr->domain);
+ 
+-    if ( !set_xcr0(new_bv) )
+         return -EFAULT;
++    }
+ 
+     mask = new_bv & ~curr->arch.xcr0_accum;
+     curr->arch.xcr0 = new_bv;
+diff --git a/xen/common/cpu.c b/xen/common/cpu.c
+index 6350f150bd..653a56b840 100644
+--- a/xen/common/cpu.c
++++ b/xen/common/cpu.c
+@@ -67,12 +67,17 @@ void __init register_cpu_notifier(struct notifier_block *nb)
+     spin_unlock(&cpu_add_remove_lock);
+ }
+ 
+-static int take_cpu_down(void *unused)
++static void _take_cpu_down(void *unused)
+ {
+     void *hcpu = (void *)(long)smp_processor_id();
+     int notifier_rc = notifier_call_chain(&cpu_chain, CPU_DYING, hcpu, NULL);
+     BUG_ON(notifier_rc != NOTIFY_DONE);
+     __cpu_disable();
++}
++
++static int take_cpu_down(void *arg)
++{
++    _take_cpu_down(arg);
+     return 0;
+ }
+ 
+@@ -98,7 +103,9 @@ int cpu_down(unsigned int cpu)
+         goto fail;
+     }
+ 
+-    if ( (err = stop_machine_run(take_cpu_down, NULL, cpu)) < 0 )
++    if ( unlikely(system_state < SYS_STATE_active) )
++        on_selected_cpus(cpumask_of(cpu), _take_cpu_down, NULL, true);
++    else if ( (err = stop_machine_run(take_cpu_down, NULL, cpu)) < 0 )
+         goto fail;
+ 
+     __cpu_die(cpu);
+diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
+index 999839444e..1e8edcbd57 100644
+--- a/xen/common/cpupool.c
++++ b/xen/common/cpupool.c
+@@ -490,7 +490,7 @@ static int cpupool_cpu_add(unsigned int cpu)
+     cpumask_clear_cpu(cpu, &cpupool_locked_cpus);
+     cpumask_set_cpu(cpu, &cpupool_free_cpus);
+ 
+-    if ( system_state == SYS_STATE_resume )
++    if ( system_state == SYS_STATE_suspend || system_state == SYS_STATE_resume )
+     {
+         struct cpupool **c;
+ 
+@@ -522,6 +522,7 @@ static int cpupool_cpu_add(unsigned int cpu)
+          * (or unplugging would have failed) and that is the default behavior
+          * anyway.
+          */
++        per_cpu(cpupool, cpu) = NULL;
+         ret = cpupool_assign_cpu_locked(cpupool0, cpu);
+     }
+  out:
+diff --git a/xen/common/efi/boot.c b/xen/common/efi/boot.c
+index 64d12685d3..6be0b3986f 100644
+--- a/xen/common/efi/boot.c
++++ b/xen/common/efi/boot.c
+@@ -1304,6 +1304,8 @@ efi_start(EFI_HANDLE ImageHandle, EFI_SYSTEM_TABLE *SystemTable)
+ 
+ #ifndef CONFIG_ARM /* TODO - runtime service support */
+ 
++#include <asm/spec_ctrl.h>
++
+ static bool __initdata efi_map_uc;
+ 
+ static int __init parse_efi_param(const char *s)
+@@ -1419,6 +1421,16 @@ void __init efi_init_memory(void)
+                desc->PhysicalStart, desc->PhysicalStart + len - 1,
+                desc->Type, desc->Attribute);
+ 
++        if ( (desc->Attribute & (EFI_MEMORY_WB | EFI_MEMORY_WT)) ||
++             (efi_bs_revision >= EFI_REVISION(2, 5) &&
++              (desc->Attribute & EFI_MEMORY_WP)) )
++        {
++            /* Supplement the heuristics in l1tf_calculations(). */
++            l1tf_safe_maddr =
++                max(l1tf_safe_maddr,
++                    ROUNDUP(desc->PhysicalStart + len, PAGE_SIZE));
++        }
++
+         if ( !efi_enabled(EFI_RS) ||
+              (!(desc->Attribute & EFI_MEMORY_RUNTIME) &&
+               (!map_bs ||
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index c757b7f6f5..231ecf509a 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -97,7 +97,11 @@ static unsigned int __read_mostly max_maptrack_frames =
+                                                DEFAULT_MAX_MAPTRACK_FRAMES;
+ integer_runtime_param("gnttab_max_maptrack_frames", max_maptrack_frames);
+ 
+-static unsigned int __read_mostly opt_gnttab_max_version = 2;
++#ifndef GNTTAB_MAX_VERSION
++#define GNTTAB_MAX_VERSION 2
++#endif
++
++static unsigned int __read_mostly opt_gnttab_max_version = GNTTAB_MAX_VERSION;
+ static bool __read_mostly opt_transitive_grants = true;
+ 
+ static int __init parse_gnttab(const char *s)
+diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
+index 20ee1e4897..02aeed7c47 100644
+--- a/xen/common/page_alloc.c
++++ b/xen/common/page_alloc.c
+@@ -1426,7 +1426,7 @@ static void free_heap_pages(
+ 
+             page_list_del(predecessor, &heap(node, zone, order));
+ 
+-            /* Keep predecessor's first_dirty if it is already set. */
++            /* Update predecessor's first_dirty if necessary. */
+             if ( predecessor->u.free.first_dirty == INVALID_DIRTY_IDX &&
+                  pg->u.free.first_dirty != INVALID_DIRTY_IDX )
+                 predecessor->u.free.first_dirty = (1U << order) +
+@@ -1447,6 +1447,12 @@ static void free_heap_pages(
+ 
+             check_and_stop_scrub(successor);
+ 
++            /* Update pg's first_dirty if necessary. */
++            if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX &&
++                 successor->u.free.first_dirty != INVALID_DIRTY_IDX )
++                pg->u.free.first_dirty = (1U << order) +
++                                         successor->u.free.first_dirty;
++
+             page_list_del(successor, &heap(node, zone, order));
+         }
+ 
+diff --git a/xen/common/tasklet.c b/xen/common/tasklet.c
+index 0f0a6f8365..d4fea3151c 100644
+--- a/xen/common/tasklet.c
++++ b/xen/common/tasklet.c
+@@ -156,6 +156,10 @@ void tasklet_kill(struct tasklet *t)
+ 
+     spin_lock_irqsave(&tasklet_lock, flags);
+ 
++    /* Cope with uninitialised tasklets. */
++    if ( list_head_is_null(&t->list) )
++        goto unlock;
++
+     if ( !list_empty(&t->list) )
+     {
+         BUG_ON(t->is_dead || t->is_running || (t->scheduled_on < 0));
+@@ -172,6 +176,7 @@ void tasklet_kill(struct tasklet *t)
+         spin_lock_irqsave(&tasklet_lock, flags);
+     }
+ 
++ unlock:
+     spin_unlock_irqrestore(&tasklet_lock, flags);
+ }
+ 
+diff --git a/xen/include/asm-arm/arm32/system.h b/xen/include/asm-arm/arm32/system.h
+index c617b40438..ab57abfbc5 100644
+--- a/xen/include/asm-arm/arm32/system.h
++++ b/xen/include/asm-arm/arm32/system.h
+@@ -48,6 +48,24 @@ static inline int local_fiq_is_enabled(void)
+     return !(flags & PSR_FIQ_MASK);
+ }
+ 
++#define CSDB    ".inst  0xe320f014"
++
++static inline unsigned long array_index_mask_nospec(unsigned long idx,
++                                                    unsigned long sz)
++{
++    unsigned long mask;
++
++    asm volatile( "cmp    %1, %2\n"
++                  "sbc    %0, %1, %1\n"
++                  CSDB
++                  : "=r" (mask)
++                  : "r" (idx), "Ir" (sz)
++                  : "cc" );
++
++    return mask;
++}
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #endif
+ /*
+  * Local variables:
+diff --git a/xen/include/asm-arm/arm64/system.h b/xen/include/asm-arm/arm64/system.h
+index 2e2ee212a1..2e36573ac6 100644
+--- a/xen/include/asm-arm/arm64/system.h
++++ b/xen/include/asm-arm/arm64/system.h
+@@ -58,6 +58,28 @@ static inline int local_fiq_is_enabled(void)
+     return !(flags & PSR_FIQ_MASK);
+ }
+ 
++#define csdb()  asm volatile ( "hint #20" : : : "memory" )
++
++/*
++ * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
++ * and 0 otherwise.
++ */
++static inline unsigned long array_index_mask_nospec(unsigned long idx,
++                                                    unsigned long sz)
++{
++    unsigned long mask;
++
++    asm volatile ( "cmp     %1, %2\n"
++                   "sbc     %0, xzr, xzr\n"
++                   : "=r" (mask)
++                   : "r" (idx), "Ir" (sz)
++                   : "cc" );
++    csdb();
++
++    return mask;
++}
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #endif
+ /*
+  * Local variables:
+diff --git a/xen/include/asm-arm/grant_table.h b/xen/include/asm-arm/grant_table.h
+index e52936c79f..24958e4670 100644
+--- a/xen/include/asm-arm/grant_table.h
++++ b/xen/include/asm-arm/grant_table.h
+@@ -7,6 +7,7 @@
+ #include <xen/sched.h>
+ 
+ #define INITIAL_NR_GRANT_FRAMES 1U
++#define GNTTAB_MAX_VERSION 1
+ 
+ struct grant_table_arch {
+     gfn_t *shared_gfn;
+diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h
+index 2cf8f7ea2a..b237da165c 100644
+--- a/xen/include/asm-x86/cpufeature.h
++++ b/xen/include/asm-x86/cpufeature.h
+@@ -113,6 +113,7 @@
+ #define cpu_has_aperfmperf      boot_cpu_has(X86_FEATURE_APERFMPERF)
+ #define cpu_has_lfence_dispatch boot_cpu_has(X86_FEATURE_LFENCE_DISPATCH)
+ #define cpu_has_no_xpti         boot_cpu_has(X86_FEATURE_NO_XPTI)
++#define cpu_has_xen_lbr         boot_cpu_has(X86_FEATURE_XEN_LBR)
+ 
+ enum _cache_type {
+     CACHE_TYPE_NULL = 0,
+diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h
+index b90aa2d046..8e5cc53dde 100644
+--- a/xen/include/asm-x86/cpufeatures.h
++++ b/xen/include/asm-x86/cpufeatures.h
+@@ -32,3 +32,4 @@ XEN_CPUFEATURE(SC_RSB_PV,       (FSCAPINTS+0)*32+18) /* RSB overwrite needed for
+ XEN_CPUFEATURE(SC_RSB_HVM,      (FSCAPINTS+0)*32+19) /* RSB overwrite needed for HVM */
+ XEN_CPUFEATURE(NO_XPTI,         (FSCAPINTS+0)*32+20) /* XPTI mitigation not in use */
+ XEN_CPUFEATURE(SC_MSR_IDLE,     (FSCAPINTS+0)*32+21) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */
++XEN_CPUFEATURE(XEN_LBR,         (FSCAPINTS+0)*32+22) /* Xen uses MSR_DEBUGCTL.LBR */
+diff --git a/xen/include/asm-x86/domain.h b/xen/include/asm-x86/domain.h
+index e0d413c7de..61e6900465 100644
+--- a/xen/include/asm-x86/domain.h
++++ b/xen/include/asm-x86/domain.h
+@@ -121,6 +121,11 @@ struct shadow_domain {
+ 
+     /* Has this domain ever used HVMOP_pagetable_dying? */
+     bool_t pagetable_dying_op;
++
++#ifdef CONFIG_PV
++    /* PV L1 Terminal Fault mitigation. */
++    struct tasklet pv_l1tf_tasklet;
++#endif /* CONFIG_PV */
+ #endif
+ };
+ 
+@@ -257,6 +262,8 @@ struct pv_domain
+     bool xpti;
+     /* Use PCID feature? */
+     bool pcid;
++    /* Mitigate L1TF with shadow/crashing? */
++    bool check_l1tf;
+ 
+     /* map_domain_page() mapping cache. */
+     struct mapcache_domain mapcache;
+diff --git a/xen/include/asm-x86/hvm/vmx/vmcs.h b/xen/include/asm-x86/hvm/vmx/vmcs.h
+index 06c3179cec..57e5098b99 100644
+--- a/xen/include/asm-x86/hvm/vmx/vmcs.h
++++ b/xen/include/asm-x86/hvm/vmx/vmcs.h
+@@ -130,10 +130,18 @@ struct arch_vmx_struct {
+     uint64_t             sfmask;
+ 
+     struct vmx_msr_bitmap *msr_bitmap;
+-    unsigned int         msr_count;
++
++    /*
++     * Most accesses to the MSR host/guest load/save lists are in current
++     * context.  However, the data can be modified by toolstack/migration
++     * actions.  Remote access is only permitted for paused vcpus, and is
++     * protected under the domctl lock.
++     */
+     struct vmx_msr_entry *msr_area;
+-    unsigned int         host_msr_count;
+     struct vmx_msr_entry *host_msr_area;
++    unsigned int         msr_load_count;
++    unsigned int         msr_save_count;
++    unsigned int         host_msr_count;
+ 
+     unsigned long        eoi_exitmap_changed;
+     DECLARE_BITMAP(eoi_exit_bitmap, NR_VECTORS);
+@@ -149,7 +157,7 @@ struct arch_vmx_struct {
+     /* Are we emulating rather than VMENTERing? */
+     uint8_t              vmx_emulate;
+ 
+-    uint8_t              lbr_fixup_enabled;
++    uint8_t              lbr_flags;
+ 
+     /* Bitmask of segments that we can't safely use in virtual 8086 mode */
+     uint16_t             vm86_segment_mask;
+@@ -514,9 +522,6 @@ enum vmcs_field {
+ 
+ #define VMCS_VPID_WIDTH 16
+ 
+-#define VMX_GUEST_MSR 0
+-#define VMX_HOST_MSR  1
+-
+ /* VM Instruction error numbers */
+ enum vmx_insn_errno
+ {
+@@ -534,6 +539,67 @@ enum vmx_insn_errno
+     VMX_INSN_FAIL_INVALID                  = ~0,
+ };
+ 
++/* MSR load/save list infrastructure. */
++enum vmx_msr_list_type {
++    VMX_MSR_HOST,           /* MSRs loaded on VMExit.                   */
++    VMX_MSR_GUEST,          /* MSRs saved on VMExit, loaded on VMEntry. */
++    VMX_MSR_GUEST_LOADONLY, /* MSRs loaded on VMEntry only.             */
++};
++
++/**
++ * Add an MSR to an MSR list (inserting space for the entry if necessary), and
++ * set the MSRs value.
++ *
++ * It is undefined behaviour to try and insert the same MSR into both the
++ * GUEST and GUEST_LOADONLY list.
++ *
++ * May fail if unable to allocate memory for the list, or the total number of
++ * entries exceeds the memory allocated.
++ */
++int vmx_add_msr(struct vcpu *v, uint32_t msr, uint64_t val,
++                enum vmx_msr_list_type type);
++
++static inline int vmx_add_guest_msr(struct vcpu *v, uint32_t msr, uint64_t val)
++{
++    return vmx_add_msr(v, msr, val, VMX_MSR_GUEST);
++}
++static inline int vmx_add_host_load_msr(struct vcpu *v, uint32_t msr,
++                                        uint64_t val)
++{
++    return vmx_add_msr(v, msr, val, VMX_MSR_HOST);
++}
++
++struct vmx_msr_entry *vmx_find_msr(const struct vcpu *v, uint32_t msr,
++                                   enum vmx_msr_list_type type);
++
++static inline int vmx_read_guest_msr(const struct vcpu *v, uint32_t msr,
++                                     uint64_t *val)
++{
++    const struct vmx_msr_entry *ent = vmx_find_msr(v, msr, VMX_MSR_GUEST);
++
++    if ( !ent )
++        return -ESRCH;
++
++    *val = ent->data;
++
++    return 0;
++}
++
++static inline int vmx_write_guest_msr(struct vcpu *v, uint32_t msr,
++                                      uint64_t val)
++{
++    struct vmx_msr_entry *ent = vmx_find_msr(v, msr, VMX_MSR_GUEST);
++
++    if ( !ent )
++        return -ESRCH;
++
++    ent->data = val;
++
++    return 0;
++}
++
++
++/* MSR intercept bitmap infrastructure. */
+ enum vmx_msr_intercept_type {
+     VMX_MSR_R  = 1,
+     VMX_MSR_W  = 2,
+@@ -544,10 +610,6 @@ void vmx_clear_msr_intercept(struct vcpu *v, unsigned int msr,
+                              enum vmx_msr_intercept_type type);
+ void vmx_set_msr_intercept(struct vcpu *v, unsigned int msr,
+                            enum vmx_msr_intercept_type type);
+-int vmx_read_guest_msr(u32 msr, u64 *val);
+-int vmx_write_guest_msr(u32 msr, u64 val);
+-struct vmx_msr_entry *vmx_find_msr(u32 msr, int type);
+-int vmx_add_msr(u32 msr, int type);
+ void vmx_vmcs_switch(paddr_t from, paddr_t to);
+ void vmx_set_eoi_exit_bitmap(struct vcpu *v, u8 vector);
+ void vmx_clear_eoi_exit_bitmap(struct vcpu *v, u8 vector);
+@@ -562,15 +624,6 @@ void virtual_vmcs_vmwrite(const struct vcpu *, u32 encoding, u64 val);
+ enum vmx_insn_errno virtual_vmcs_vmwrite_safe(const struct vcpu *v,
+                                               u32 vmcs_encoding, u64 val);
+ 
+-static inline int vmx_add_guest_msr(u32 msr)
+-{
+-    return vmx_add_msr(msr, VMX_GUEST_MSR);
+-}
+-static inline int vmx_add_host_load_msr(u32 msr)
+-{
+-    return vmx_add_msr(msr, VMX_HOST_MSR);
+-}
+-
+ DECLARE_PER_CPU(bool_t, vmxon);
+ 
+ bool_t vmx_vcpu_pml_enabled(const struct vcpu *v);
+diff --git a/xen/include/asm-x86/hypercall.h b/xen/include/asm-x86/hypercall.h
+index 1cc2e37d5c..da38b7991c 100644
+--- a/xen/include/asm-x86/hypercall.h
++++ b/xen/include/asm-x86/hypercall.h
+@@ -165,7 +165,7 @@ extern int compat_update_va_mapping(
+     unsigned int va, u32 lo, u32 hi, unsigned int flags);
+ 
+ extern int compat_update_va_mapping_otherdomain(
+-    unsigned long va, u32 lo, u32 hi, unsigned long flags, domid_t domid);
++    unsigned int va, u32 lo, u32 hi, unsigned int flags, domid_t domid);
+ 
+ DEFINE_XEN_GUEST_HANDLE(trap_info_compat_t);
+ extern int compat_set_trap_table(XEN_GUEST_HANDLE(trap_info_compat_t) traps);
+diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
+index 8fbccc88a7..7235623c86 100644
+--- a/xen/include/asm-x86/msr-index.h
++++ b/xen/include/asm-x86/msr-index.h
+@@ -47,8 +47,12 @@
+ #define ARCH_CAPABILITIES_RDCL_NO	(_AC(1, ULL) << 0)
+ #define ARCH_CAPABILITIES_IBRS_ALL	(_AC(1, ULL) << 1)
+ #define ARCH_CAPS_RSBA			(_AC(1, ULL) << 2)
++#define ARCH_CAPS_SKIP_L1DFL		(_AC(1, ULL) << 3)
+ #define ARCH_CAPS_SSB_NO		(_AC(1, ULL) << 4)
+ 
++#define MSR_FLUSH_CMD			0x0000010b
++#define FLUSH_CMD_L1D			(_AC(1, ULL) << 0)
++
+ /* Intel MSRs. Some also available on other CPUs */
+ #define MSR_IA32_PERFCTR0		0x000000c1
+ #define MSR_IA32_A_PERFCTR0		0x000004c1
+diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h
+index f14f265aa5..afbeb7f155 100644
+--- a/xen/include/asm-x86/msr.h
++++ b/xen/include/asm-x86/msr.h
+@@ -241,7 +241,7 @@ static inline void write_efer(uint64_t val)
+     wrmsrl(MSR_EFER, val);
+ }
+ 
+-DECLARE_PER_CPU(u32, ler_msr);
++extern unsigned int ler_msr;
+ 
+ DECLARE_PER_CPU(uint32_t, tsc_aux);
+ 
+diff --git a/xen/include/asm-x86/paging.h b/xen/include/asm-x86/paging.h
+index f0085511c7..f440e3e53c 100644
+--- a/xen/include/asm-x86/paging.h
++++ b/xen/include/asm-x86/paging.h
+@@ -37,11 +37,14 @@
+ 
+ #define PG_SH_shift    20
+ #define PG_HAP_shift   21
++#define PG_SHF_shift   22
+ /* We're in one of the shadow modes */
+ #ifdef CONFIG_SHADOW_PAGING
+ #define PG_SH_enable   (1U << PG_SH_shift)
++#define PG_SH_forced   (1U << PG_SHF_shift)
+ #else
+ #define PG_SH_enable   0
++#define PG_SH_forced   0
+ #endif
+ #define PG_HAP_enable  (1U << PG_HAP_shift)
+ 
+@@ -62,6 +65,7 @@
+ 
+ #define paging_mode_enabled(_d)   (!!(_d)->arch.paging.mode)
+ #define paging_mode_shadow(_d)    (!!((_d)->arch.paging.mode & PG_SH_enable))
++#define paging_mode_sh_forced(_d) (!!((_d)->arch.paging.mode & PG_SH_forced))
+ #define paging_mode_hap(_d)       (!!((_d)->arch.paging.mode & PG_HAP_enable))
+ 
+ #define paging_mode_refcounts(_d) (!!((_d)->arch.paging.mode & PG_refcounts))
+diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
+index 9924cdf1f3..2bd9e69684 100644
+--- a/xen/include/asm-x86/processor.h
++++ b/xen/include/asm-x86/processor.h
+@@ -337,12 +337,6 @@ static always_inline void set_in_cr4 (unsigned long mask)
+     write_cr4(read_cr4() | mask);
+ }
+ 
+-static always_inline void clear_in_cr4 (unsigned long mask)
+-{
+-    mmu_cr4_features &= ~mask;
+-    write_cr4(read_cr4() & ~mask);
+-}
+-
+ static inline unsigned int read_pkru(void)
+ {
+     unsigned int pkru;
+diff --git a/xen/include/asm-x86/setup.h b/xen/include/asm-x86/setup.h
+index 19232afa01..c09a5ff381 100644
+--- a/xen/include/asm-x86/setup.h
++++ b/xen/include/asm-x86/setup.h
+@@ -66,6 +66,8 @@ extern uint8_t kbd_shift_flags;
+ extern unsigned long highmem_start;
+ #endif
+ 
++extern int8_t opt_smt;
++
+ #ifdef CONFIG_SHADOW_PAGING
+ extern bool opt_dom0_shadow;
+ #else
+diff --git a/xen/include/asm-x86/shadow.h b/xen/include/asm-x86/shadow.h
+index 94a34fd16a..f40f411871 100644
+--- a/xen/include/asm-x86/shadow.h
++++ b/xen/include/asm-x86/shadow.h
+@@ -29,6 +29,7 @@
+ #include <asm/flushtlb.h>
+ #include <asm/paging.h>
+ #include <asm/p2m.h>
++#include <asm/spec_ctrl.h>
+ 
+ /*****************************************************************************
+  * Macros to tell which shadow paging mode a domain is in*/
+@@ -115,6 +116,131 @@ static inline int shadow_domctl(struct domain *d,
+ 
+ #endif /* CONFIG_SHADOW_PAGING */
+ 
++/*
++ * Mitigations for L1TF / CVE-2018-3620 for PV guests.
++ *
++ * We cannot alter an architecturally-legitimate PTE which a PV guest has
++ * chosen to write, as traditional paged-out metadata is L1TF-vulnerable.
++ * What we can do is force a PV guest which writes a vulnerable PTE into
++ * shadow mode, so Xen controls the pagetables which are reachable by the CPU
++ * pagewalk.
++ *
++ * The core of the L1TF vulnerability is that the address bits of the PTE
++ * (accounting for PSE and factoring in the level-relevant part of the linear
++ * access) are sent for an L1D lookup (to retrieve the next-level PTE, or
++ * eventual memory address) before the Present or reserved bits (which would
++ * cause a terminal fault) are accounted for.  If an L1D hit occurs, the
++ * resulting data is available for potentially dependent instructions.
++ *
++ * For Present PTEs, the PV type-count safety logic ensures that the address
++ * bits always point at a guest-accessible frame, which is safe WRT L1TF from
++ * Xen's point of view.  In practice, a PV guest should be unable to set any
++ * reserved bits, so should be unable to create any present L1TF-vulnerable
++ * PTEs at all.
++ *
++ * Therefore, these safety checks apply to Not-Present PTEs only, where
++ * traditionally, Xen would have let the guest write any value it chose.
++ *
++ * The all-zero PTE potentially leaks mfn 0.  All software on the system is
++ * expected to cooperate and not put any secrets there.  In a Xen system,
++ * neither Xen nor dom0 are expected to touch mfn 0, as it typically contains
++ * the real mode IVT and Bios Data Area.  Therefore, mfn 0 is considered safe.
++ *
++ * Any PTE whose address is higher than the maximum cacheable address is safe,
++ * as it won't get an L1D hit.
++ *
++ * Speculative superpages also need accounting for, as PSE is considered
++ * irrespective of Present.  We disallow PSE being set, as it allows an
++ * attacker to leak 2M or 1G of data starting from mfn 0.  Also, because of
++ * recursive/linear pagetables, we must consider PSE even at L4, as hardware
++ * will interpret an L4e as an L3e during a recursive walk.
++ */
++
++static inline bool is_l1tf_safe_maddr(intpte_t pte)
++{
++    paddr_t maddr = pte & l1tf_addr_mask;
++
++    return maddr == 0 || maddr >= l1tf_safe_maddr;
++}
++
++static inline bool pv_l1tf_check_pte(struct domain *d, unsigned int level,
++                                     intpte_t pte)
++{
++    ASSERT(is_pv_domain(d));
++    ASSERT(!(pte & _PAGE_PRESENT));
++
++    if ( d->arch.pv_domain.check_l1tf && !paging_mode_sh_forced(d) &&
++         (((level > 1) && (pte & _PAGE_PSE)) || !is_l1tf_safe_maddr(pte)) )
++    {
++#ifdef CONFIG_SHADOW_PAGING
++        struct tasklet *t = &d->arch.paging.shadow.pv_l1tf_tasklet;
++
++        printk(XENLOG_G_WARNING
++               "d%d L1TF-vulnerable L%ue %016"PRIx64" - Shadowing\n",
++               d->domain_id, level, pte);
++        /*
++         * Safety consideration for accessing tasklet.scheduled_on without the
++         * tasklet lock.  This is a singleshot tasklet with the side effect of
++         * setting PG_SH_forced (checked just above).  Multiple vcpus can race
++         * to schedule the tasklet, but if we observe it scheduled anywhere,
++         * that is good enough.
++         */
++        smp_rmb();
++        if ( !tasklet_is_scheduled(t) )
++            tasklet_schedule(t);
++#else
++        printk(XENLOG_G_ERR
++               "d%d L1TF-vulnerable L%ue %016"PRIx64" - Crashing\n",
++               d->domain_id, level, pte);
++        domain_crash(d);
++#endif
++        return true;
++    }
++
++    return false;
++}
++
++static inline bool pv_l1tf_check_l1e(struct domain *d, l1_pgentry_t l1e)
++{
++    return pv_l1tf_check_pte(d, 1, l1e.l1);
++}
++
++static inline bool pv_l1tf_check_l2e(struct domain *d, l2_pgentry_t l2e)
++{
++    return pv_l1tf_check_pte(d, 2, l2e.l2);
++}
++
++static inline bool pv_l1tf_check_l3e(struct domain *d, l3_pgentry_t l3e)
++{
++    return pv_l1tf_check_pte(d, 3, l3e.l3);
++}
++
++static inline bool pv_l1tf_check_l4e(struct domain *d, l4_pgentry_t l4e)
++{
++    return pv_l1tf_check_pte(d, 4, l4e.l4);
++}
++
++void pv_l1tf_tasklet(unsigned long data);
++
++static inline void pv_l1tf_domain_init(struct domain *d)
++{
++    d->arch.pv_domain.check_l1tf =
++        opt_pv_l1tf & (is_hardware_domain(d)
++                       ? OPT_PV_L1TF_DOM0 : OPT_PV_L1TF_DOMU);
++
++#if defined(CONFIG_SHADOW_PAGING) && defined(CONFIG_PV)
++    tasklet_init(&d->arch.paging.shadow.pv_l1tf_tasklet,
++                 pv_l1tf_tasklet, (unsigned long)d);
++#endif
++}
++
++static inline void pv_l1tf_domain_destroy(struct domain *d)
++{
++#if defined(CONFIG_SHADOW_PAGING) && defined(CONFIG_PV)
++    tasklet_kill(&d->arch.paging.shadow.pv_l1tf_tasklet);
++#endif
++}
++
+ /* Remove all shadows of the guest mfn. */
+ static inline void shadow_remove_all_shadows(struct domain *d, mfn_t gmfn)
+ {
+diff --git a/xen/include/asm-x86/smp.h b/xen/include/asm-x86/smp.h
+index 4e5f673fec..09c55458df 100644
+--- a/xen/include/asm-x86/smp.h
++++ b/xen/include/asm-x86/smp.h
+@@ -26,6 +26,8 @@ DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_mask);
+ DECLARE_PER_CPU(cpumask_var_t, cpu_core_mask);
+ DECLARE_PER_CPU(cpumask_var_t, scratch_cpumask);
+ 
++extern bool park_offline_cpus;
++
+ void smp_send_nmi_allbutself(void);
+ 
+ void send_IPI_mask(const cpumask_t *, int vector);
+diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h
+index 5b40afbab0..8f8aad40bb 100644
+--- a/xen/include/asm-x86/spec_ctrl.h
++++ b/xen/include/asm-x86/spec_ctrl.h
+@@ -29,15 +29,27 @@ void init_speculation_mitigations(void);
+ extern bool opt_ibpb;
+ extern bool opt_ssbd;
+ extern int8_t opt_eager_fpu;
++extern int8_t opt_l1d_flush;
+ 
+ extern bool bsp_delay_spec_ctrl;
+ extern uint8_t default_xen_spec_ctrl;
+ extern uint8_t default_spec_ctrl_flags;
+ 
+-extern uint8_t opt_xpti;
++extern int8_t opt_xpti;
+ #define OPT_XPTI_DOM0  0x01
+ #define OPT_XPTI_DOMU  0x02
+ 
++extern int8_t opt_pv_l1tf;
++#define OPT_PV_L1TF_DOM0  0x01
++#define OPT_PV_L1TF_DOMU  0x02
++
++/*
++ * The L1D address mask, which might be wider than reported in CPUID, and the
++ * system physical address above which there are believed to be no cacheable
++ * memory regions, thus unable to leak data via the L1TF vulnerability.
++ */
++extern paddr_t l1tf_addr_mask, l1tf_safe_maddr;
++
+ static inline void init_shadow_spec_ctrl_state(void)
+ {
+     struct cpu_info *info = get_cpu_info();
+diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h
+index 43fb6fe489..483cd20afd 100644
+--- a/xen/include/asm-x86/system.h
++++ b/xen/include/asm-x86/system.h
+@@ -221,6 +221,30 @@ static always_inline unsigned long __xadd(
+ #define set_mb(var, value) do { xchg(&var, value); } while (0)
+ #define set_wmb(var, value) do { var = value; smp_wmb(); } while (0)
+ 
++/**
++ * array_index_mask_nospec() - generate a mask that is ~0UL when the
++ *      bounds check succeeds and 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * Returns:
++ *     0 - (index < size)
++ */
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++                                                    unsigned long size)
++{
++    unsigned long mask;
++
++    asm volatile ( "cmp %[size], %[index]; sbb %[mask], %[mask];"
++                   : [mask] "=r" (mask)
++                   : [size] "g" (size), [index] "r" (index) );
++
++    return mask;
++}
++
++/* Override default implementation in nospec.h. */
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #define local_irq_disable()     asm volatile ( "cli" : : : "memory" )
+ #define local_irq_enable()      asm volatile ( "sti" : : : "memory" )
+ 
+diff --git a/xen/include/asm-x86/xstate.h b/xen/include/asm-x86/xstate.h
+index 86a4a1f75c..47f602b855 100644
+--- a/xen/include/asm-x86/xstate.h
++++ b/xen/include/asm-x86/xstate.h
+@@ -97,8 +97,9 @@ void xsave(struct vcpu *v, uint64_t mask);
+ void xrstor(struct vcpu *v, uint64_t mask);
+ void xstate_set_init(uint64_t mask);
+ bool xsave_enabled(const struct vcpu *v);
+-int __must_check validate_xstate(u64 xcr0, u64 xcr0_accum,
+-                                 const struct xsave_hdr *);
++int __must_check validate_xstate(const struct domain *d,
++                                 uint64_t xcr0, uint64_t xcr0_accum,
++                                 const struct xsave_hdr *hdr);
+ int __must_check handle_xsetbv(u32 index, u64 new_bv);
+ void expand_xsave_states(struct vcpu *v, void *dest, unsigned int size);
+ void compress_xsave_states(struct vcpu *v, const void *src, unsigned int size);
+diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h
+index f1a5ed93e0..6c82816fd3 100644
+--- a/xen/include/public/arch-x86/cpufeatureset.h
++++ b/xen/include/public/arch-x86/cpufeatureset.h
+@@ -244,6 +244,7 @@ XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A  AVX512 Neural Network Instructions *
+ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A  AVX512 Multiply Accumulation Single Precision */
+ XEN_CPUFEATURE(IBRSB,         9*32+26) /*A  IBRS and IBPB support (used by Intel) */
+ XEN_CPUFEATURE(STIBP,         9*32+27) /*A  STIBP */
++XEN_CPUFEATURE(L1D_FLUSH,     9*32+28) /*S  MSR_FLUSH_CMD and L1D flush. */
+ XEN_CPUFEATURE(ARCH_CAPS,     9*32+29) /*   IA32_ARCH_CAPABILITIES MSR */
+ XEN_CPUFEATURE(SSBD,          9*32+31) /*A  MSR_SPEC_CTRL.SSBD available */
+ 
+diff --git a/xen/include/xen/compiler.h b/xen/include/xen/compiler.h
+index 533a8ea0f3..a7e05681c9 100644
+--- a/xen/include/xen/compiler.h
++++ b/xen/include/xen/compiler.h
+@@ -81,6 +81,9 @@
+ #pragma GCC visibility push(hidden)
+ #endif
+ 
++/* Make the optimizer believe the variable can be manipulated arbitrarily. */
++#define OPTIMIZER_HIDE_VAR(var) __asm__ ( "" : "+g" (var) )
++
+ /* This macro obfuscates arithmetic on a variable address so that gcc
+    shouldn't recognize the original var, and make assumptions about it */
+ /*
+diff --git a/xen/include/xen/cpu.h b/xen/include/xen/cpu.h
+index ffefc09f8e..2fe3ec05d8 100644
+--- a/xen/include/xen/cpu.h
++++ b/xen/include/xen/cpu.h
+@@ -47,6 +47,8 @@ void register_cpu_notifier(struct notifier_block *nb);
+ #define CPU_DYING        (0x0007 | NOTIFY_REVERSE)
+ /* CPU_DEAD: CPU is dead. */
+ #define CPU_DEAD         (0x0008 | NOTIFY_REVERSE)
++/* CPU_REMOVE: CPU was removed. */
++#define CPU_REMOVE       (0x0009 | NOTIFY_REVERSE)
+ 
+ /* Perform CPU hotplug. May return -EAGAIN. */
+ int cpu_down(unsigned int cpu);
+diff --git a/xen/include/xen/cpumask.h b/xen/include/xen/cpumask.h
+index 42340a098e..4a11bcc3f3 100644
+--- a/xen/include/xen/cpumask.h
++++ b/xen/include/xen/cpumask.h
+@@ -351,16 +351,35 @@ static inline bool_t alloc_cpumask_var(cpumask_var_t *mask)
+ 	return *mask != NULL;
+ }
+ 
++static inline bool cond_alloc_cpumask_var(cpumask_var_t *mask)
++{
++	if (*mask == NULL)
++		*mask = _xmalloc(nr_cpumask_bits / 8, sizeof(long));
++	return *mask != NULL;
++}
++
+ static inline bool_t zalloc_cpumask_var(cpumask_var_t *mask)
+ {
+ 	*(void **)mask = _xzalloc(nr_cpumask_bits / 8, sizeof(long));
+ 	return *mask != NULL;
+ }
+ 
++static inline bool cond_zalloc_cpumask_var(cpumask_var_t *mask)
++{
++	if (*mask == NULL)
++		*mask = _xzalloc(nr_cpumask_bits / 8, sizeof(long));
++	else
++		cpumask_clear(*mask);
++	return *mask != NULL;
++}
++
+ static inline void free_cpumask_var(cpumask_var_t mask)
+ {
+ 	xfree(mask);
+ }
++
++/* Free an allocated mask, and zero the pointer to it. */
++#define FREE_CPUMASK_VAR(m) XFREE(m)
+ #else
+ typedef cpumask_t cpumask_var_t[1];
+ 
+@@ -368,16 +387,20 @@ static inline bool_t alloc_cpumask_var(cpumask_var_t *mask)
+ {
+ 	return 1;
+ }
++#define cond_alloc_cpumask_var alloc_cpumask_var
+ 
+ static inline bool_t zalloc_cpumask_var(cpumask_var_t *mask)
+ {
+ 	cpumask_clear(*mask);
+ 	return 1;
+ }
++#define cond_zalloc_cpumask_var zalloc_cpumask_var
+ 
+ static inline void free_cpumask_var(cpumask_var_t mask)
+ {
+ }
++
++#define FREE_CPUMASK_VAR(m) free_cpumask_var(m)
+ #endif
+ 
+ #if NR_CPUS > 1
+diff --git a/xen/include/xen/list.h b/xen/include/xen/list.h
+index fa07d720ee..1387abb211 100644
+--- a/xen/include/xen/list.h
++++ b/xen/include/xen/list.h
+@@ -51,6 +51,11 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
+     list->prev = list;
+ }
+ 
++static inline bool list_head_is_null(const struct list_head *list)
++{
++    return !list->next && !list->prev;
++}
++
+ /*
+  * Insert a new entry between two known consecutive entries. 
+  *
+diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h
+index e928551c91..24654e8e22 100644
+--- a/xen/include/xen/mm.h
++++ b/xen/include/xen/mm.h
+@@ -162,6 +162,14 @@ void free_xenheap_pages(void *v, unsigned int order);
+ bool scrub_free_pages(void);
+ #define alloc_xenheap_page() (alloc_xenheap_pages(0,0))
+ #define free_xenheap_page(v) (free_xenheap_pages(v,0))
++
++/* Free an allocation, and zero the pointer to it. */
++#define FREE_XENHEAP_PAGES(p, o) do { \
++    free_xenheap_pages(p, o);         \
++    (p) = NULL;                       \
++} while ( false )
++#define FREE_XENHEAP_PAGE(p) FREE_XENHEAP_PAGES(p, 0)
++
+ /* Map machine page range in Xen virtual address space. */
+ int map_pages_to_xen(
+     unsigned long virt,
+diff --git a/xen/include/xen/nospec.h b/xen/include/xen/nospec.h
+new file mode 100644
+index 0000000000..48793996e8
+--- /dev/null
++++ b/xen/include/xen/nospec.h
+@@ -0,0 +1,70 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/* Copyright(c) 2018 Linus Torvalds. All rights reserved. */
++/* Copyright(c) 2018 Alexei Starovoitov. All rights reserved. */
++/* Copyright(c) 2018 Intel Corporation. All rights reserved. */
++/* Copyright(c) 2018 Citrix Systems R&D Ltd. All rights reserved. */
++
++#ifndef XEN_NOSPEC_H
++#define XEN_NOSPEC_H
++
++#include <asm/system.h>
++
++/**
++ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * When @index is out of bounds (@index >= @size), the sign bit will be
++ * set.  Extend the sign bit to all bits and invert, giving a result of
++ * zero for an out of bounds index, or ~0 if within bounds [0, @size).
++ */
++#ifndef array_index_mask_nospec
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++                                                    unsigned long size)
++{
++    /*
++     * Always calculate and emit the mask even if the compiler
++     * thinks the mask is not needed. The compiler does not take
++     * into account the value of @index under speculation.
++     */
++    OPTIMIZER_HIDE_VAR(index);
++    return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1);
++}
++#endif
++
++/*
++ * array_index_nospec - sanitize an array index after a bounds check
++ *
++ * For a code sequence like:
++ *
++ *     if (index < size) {
++ *         index = array_index_nospec(index, size);
++ *         val = array[index];
++ *     }
++ *
++ * ...if the CPU speculates past the bounds check then
++ * array_index_nospec() will clamp the index within the range of [0,
++ * size).
++ */
++#define array_index_nospec(index, size)                                 \
++({                                                                      \
++    typeof(index) _i = (index);                                         \
++    typeof(size) _s = (size);                                           \
++    unsigned long _mask = array_index_mask_nospec(_i, _s);              \
++                                                                        \
++    BUILD_BUG_ON(sizeof(_i) > sizeof(long));                            \
++    BUILD_BUG_ON(sizeof(_s) > sizeof(long));                            \
++                                                                        \
++    (typeof(_i)) (_i & _mask);                                          \
++})
++
++#endif /* XEN_NOSPEC_H */
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
+index 99d2af2e1f..e79d5a36ca 100644
+--- a/xen/include/xen/sched.h
++++ b/xen/include/xen/sched.h
+@@ -788,7 +788,7 @@ static inline struct domain *next_domain_in_cpupool(
+ #define _VPF_parked          8
+ #define VPF_parked           (1UL<<_VPF_parked)
+ 
+-static inline int vcpu_runnable(struct vcpu *v)
++static inline bool vcpu_runnable(const struct vcpu *v)
+ {
+     return !(v->pause_flags |
+              atomic_read(&v->pause_count) |
+diff --git a/xen/include/xen/tasklet.h b/xen/include/xen/tasklet.h
+index 23d69c738e..bc9ddace6d 100644
+--- a/xen/include/xen/tasklet.h
++++ b/xen/include/xen/tasklet.h
+@@ -50,6 +50,11 @@ static inline bool tasklet_work_to_do(unsigned int cpu)
+                                                 TASKLET_scheduled);
+ }
+ 
++static inline bool tasklet_is_scheduled(const struct tasklet *t)
++{
++    return t->scheduled_on != -1;
++}
++
+ void tasklet_schedule_on_cpu(struct tasklet *t, unsigned int cpu);
+ void tasklet_schedule(struct tasklet *t);
+ void do_tasklet(void);
+diff --git a/xen/include/xen/xmalloc.h b/xen/include/xen/xmalloc.h
+index cc2673d8ae..9aa5edf593 100644
+--- a/xen/include/xen/xmalloc.h
++++ b/xen/include/xen/xmalloc.h
+@@ -26,6 +26,12 @@
+ /* Free any of the above. */
+ extern void xfree(void *);
+ 
++/* Free an allocation, and zero the pointer to it. */
++#define XFREE(p) do { \
++    xfree(p);         \
++    (p) = NULL;       \
++} while ( false )
++
+ /* Underlying functions */
+ extern void *_xmalloc(unsigned long size, unsigned long align);
+ extern void *_xzalloc(unsigned long size, unsigned long align);
author	Mario Preksavec <mario@slackware.hr>	2018-08-25 14:16:23 +0200
committer	Willy Sudiarto Raharjo <willysr@slackbuilds.org>	2018-09-01 07:32:30 +0700
commit	9be84725e758c71832b27d3b3918cd67cc65f182 (patch)
tree	7617b9cb8c97051797f9464a2b0e396a1b303d20
parent	78ff47b691fb8043946cb8bcc3b820b7369d9d7f (diff)
download	slackbuilds-9be84725e758c71832b27d3b3918cd67cc65f182.tar.gz