Merge tag 'powerpc-5.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux

Pull powerpc updates from Michael Ellerman:

 - Add support for (optionally) using queued spinlocks & rwlocks.

 - Support for a new faster system call ABI using the scv instruction on
   Power9 or later.

 - Drop support for the PROT_SAO mmap/mprotect flag as it will be
   unsupported on Power10 and future processors, leaving us with no way
   to implement the functionality it requests. This risks breaking
   userspace, though we believe it is unused in practice.

 - A bug fix for, and then the removal of, our custom stack expansion
   checking. We now allow stack expansion up to the rlimit, like other
   architectures.

 - Remove the remnants of our (previously disabled) topology update
   code, which tried to react to NUMA layout changes on virtualised
   systems, but was prone to crashes and other problems.

 - Add PMU support for Power10 CPUs.

 - A change to our signal trampoline so that we don't unbalance the link
   stack (branch return predictor) in the signal delivery path.

 - Lots of other cleanups, refactorings, smaller features and so on as
   usual.

Thanks to: Abhishek Goel, Alastair D'Silva, Alexander A. Klimov, Alexey
Kardashevskiy, Alistair Popple, Andrew Donnellan, Aneesh Kumar K.V, Anju
T Sudhakar, Anton Blanchard, Arnd Bergmann, Athira Rajeev, Balamuruhan
S, Bharata B Rao, Bill Wendling, Bin Meng, Cédric Le Goater, Chris
Packham, Christophe Leroy, Christoph Hellwig, Daniel Axtens, Dan
Williams, David Lamparter, Desnes A. Nunes do Rosario, Erhard F., Finn
Thain, Frederic Barrat, Ganesh Goudar, Gautham R. Shenoy, Geoff Levand,
Greg Kurz, Gustavo A. R. Silva, Hari Bathini, Harish, Imre Kaloz, Joel
Stanley, Joe Perches, John Crispin, Jordan Niethe, Kajol Jain, Kamalesh
Babulal, Kees Cook, Laurent Dufour, Leonardo Bras, Li RongQing, Madhavan
Srinivasan, Mahesh Salgaonkar, Mark Cave-Ayland, Michal Suchanek, Milton
Miller, Mimi Zohar, Murilo Opsfelder Araujo, Nathan Chancellor, Nathan
Lynch, Naveen N. Rao, Nayna Jain, Nicholas Piggin, Oliver O'Halloran,
Palmer Dabbelt, Pedro Miraglia Franco de Carvalho, Philippe Bergheaud,
Pingfan Liu, Pratik Rajesh Sampat, Qian Cai, Qinglang Miao, Randy
Dunlap, Ravi Bangoria, Sachin Sant, Sam Bobroff, Sandipan Das, Santosh
Sivaraj, Satheesh Rajendran, Shirisha Ganta, Sourabh Jain, Srikar
Dronamraju, Stan Johnson, Stephen Rothwell, Thadeu Lima de Souza
Cascardo, Thiago Jung Bauermann, Tom Lane, Vaibhav Jain, Vladis Dronov,
Wei Yongjun, Wen Xiong, YueHaibing.

* tag 'powerpc-5.9-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (337 commits)
  selftests/powerpc: Fix pkey syscall redefinitions
  powerpc: Fix circular dependency between percpu.h and mmu.h
  powerpc/powernv/sriov: Fix use of uninitialised variable
  selftests/powerpc: Skip vmx/vsx/tar/etc tests on older CPUs
  powerpc/40x: Fix assembler warning about r0
  powerpc/papr_scm: Add support for fetching nvdimm 'fuel-gauge' metric
  powerpc/papr_scm: Fetch nvdimm performance stats from PHYP
  cpuidle: pseries: Fixup exit latency for CEDE(0)
  cpuidle: pseries: Add function to parse extended CEDE records
  cpuidle: pseries: Set the latency-hint before entering CEDE
  selftests/powerpc: Fix online CPU selection
  powerpc/perf: Consolidate perf_callchain_user_[64|32]()
  powerpc/pseries/hotplug-cpu: Remove double free in error path
  powerpc/pseries/mobility: Add pr_debug() for device tree changes
  powerpc/pseries/mobility: Set pr_fmt()
  powerpc/cacheinfo: Warn if cache object chain becomes unordered
  powerpc/cacheinfo: Improve diagnostics about malformed cache lists
  powerpc/cacheinfo: Use name@unit instead of full DT path in debug messages
  powerpc/cacheinfo: Set pr_fmt()
  powerpc: fix function annotations to avoid section mismatch warnings with gcc-10
  ...
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7 b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
index e8698af..f7e32f2 100644
--- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
+++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-hv_24x7
@@ -43,6 +43,13 @@
 		This sysfs interface exposes the number of cores per chip
 		present in the system.
 
+What:		/sys/devices/hv_24x7/interface/cpumask
+Date:		July 2020
+Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
+Description:	read only
+		This sysfs file exposes the cpumask which is designated to make
+		HCALLs to retrieve hv-24x7 pmu event counter data.
+
 What:		/sys/bus/event_source/devices/hv_24x7/event_descs/<event-name>
 Date:		February 2014
 Contact:	Linux on PowerPC Developer List <linuxppc-dev@lists.ozlabs.org>
diff --git a/Documentation/ABI/testing/sysfs-bus-papr-pmem b/Documentation/ABI/testing/sysfs-bus-papr-pmem
index 5b10d03..c1a6727 100644
--- a/Documentation/ABI/testing/sysfs-bus-papr-pmem
+++ b/Documentation/ABI/testing/sysfs-bus-papr-pmem
@@ -25,3 +25,30 @@
 				  NVDIMM have been scrubbed.
 		* "locked"	: Indicating that NVDIMM contents cant
 				  be modified until next power cycle.
+
+What:		/sys/bus/nd/devices/nmemX/papr/perf_stats
+Date:		May, 2020
+KernelVersion:	v5.9
+Contact:	linuxppc-dev <linuxppc-dev@lists.ozlabs.org>, linux-nvdimm@lists.01.org,
+Description:
+		(RO) Report various performance stats related to papr-scm NVDIMM
+		device.  Each stat is reported on a new line with each line
+		composed of a stat-identifier followed by it value. Below are
+		currently known dimm performance stats which are reported:
+
+		* "CtlResCt" : Controller Reset Count
+		* "CtlResTm" : Controller Reset Elapsed Time
+		* "PonSecs " : Power-on Seconds
+		* "MemLife " : Life Remaining
+		* "CritRscU" : Critical Resource Utilization
+		* "HostLCnt" : Host Load Count
+		* "HostSCnt" : Host Store Count
+		* "HostSDur" : Host Store Duration
+		* "HostLDur" : Host Load Duration
+		* "MedRCnt " : Media Read Count
+		* "MedWCnt " : Media Write Count
+		* "MedRDur " : Media Read Duration
+		* "MedWDur " : Media Write Duration
+		* "CchRHCnt" : Cache Read Hit Count
+		* "CchWHCnt" : Cache Write Hit Count
+		* "FastWCnt" : Fast Write Count
\ No newline at end of file
diff --git a/Documentation/ABI/testing/sysfs-class-ocxl b/Documentation/ABI/testing/sysfs-class-ocxl
index b5b1fa1..ae1276e 100644
--- a/Documentation/ABI/testing/sysfs-class-ocxl
+++ b/Documentation/ABI/testing/sysfs-class-ocxl
@@ -33,3 +33,14 @@
 Contact:	linuxppc-dev@lists.ozlabs.org
 Description:	read/write
 		Give access the global mmio area for the AFU
+
+What:		/sys/class/ocxl/<afu name>/reload_on_reset
+Date:		February 2020
+Contact:	linuxppc-dev@lists.ozlabs.org
+Description:	read/write
+		Control whether the FPGA is reloaded on a link reset. Enabled
+		through a vendor-specific logic block on the FPGA.
+			0	Do not reload FPGA image from flash
+			1	Reload FPGA image from flash
+			unavailable
+				The device does not support this capability
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index ef66b3c..00b993a 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -916,6 +916,10 @@
 	disable_radix	[PPC]
 			Disable RADIX MMU mode on POWER9
 
+	radix_hcall_invalidate=on  [PPC/PSERIES]
+			Disable RADIX GTSE feature and use hcall for TLB
+			invalidate.
+
 	disable_tlbie	[PPC]
 			Disable TLBIE instruction. Currently does not work
 			with KVM, with HASH MMU, or with coherent accelerators.
diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst
index f646687..298c9c8 100644
--- a/Documentation/core-api/cpu_hotplug.rst
+++ b/Documentation/core-api/cpu_hotplug.rst
@@ -50,13 +50,6 @@
 
   This option is limited to the X86 and S390 architecture.
 
-``cede_offline={"off","on"}``
-  Use this option to disable/enable putting offlined processors to an extended
-  ``H_CEDE`` state on supported pseries platforms. If nothing is specified,
-  ``cede_offline`` is set to "on".
-
-  This option is limited to the PowerPC architecture.
-
 ``cpu0_hotplug``
   Allow to shutdown CPU0.
 
diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
index 68658a6..47e6903 100644
--- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt
+++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt
@@ -5,7 +5,7 @@
 #
 # Architecture requirements
 #
-# * arm/arm64
+# * arm/arm64/powerpc
 #
 # Rely on implicit context synchronization as a result of exception return
 # when returning from IPI handler, and when returning to user-space.
@@ -45,7 +45,7 @@
     |       nios2: | TODO |
     |    openrisc: | TODO |
     |      parisc: | TODO |
-    |     powerpc: | TODO |
+    |     powerpc: |  ok  |
     |       riscv: | TODO |
     |        s390: | TODO |
     |          sh: | TODO |
diff --git a/Documentation/memory-barriers.txt b/Documentation/memory-barriers.txt
index 4e55aba..9618633 100644
--- a/Documentation/memory-barriers.txt
+++ b/Documentation/memory-barriers.txt
@@ -1935,6 +1935,20 @@
      relaxed I/O accessors and the Documentation/DMA-API.txt file for more
      information on consistent memory.
 
+ (*) pmem_wmb();
+
+     This is for use with persistent memory to ensure that stores for which
+     modifications are written to persistent storage reached a platform
+     durability domain.
+
+     For example, after a non-temporal write to pmem region, we use pmem_wmb()
+     to ensure that stores have reached a platform durability domain. This ensures
+     that stores have updated persistent storage before any data access or
+     data transfer caused by subsequent instructions is initiated. This is
+     in addition to the ordering done by wmb().
+
+     For load from persistent memory, existing read memory barriers are sufficient
+     to ensure read ordering.
 
 ===============================
 IMPLICIT KERNEL MEMORY BARRIERS
diff --git a/Documentation/powerpc/cpu_families.rst b/Documentation/powerpc/cpu_families.rst
index 1e063c5..9b84e04 100644
--- a/Documentation/powerpc/cpu_families.rst
+++ b/Documentation/powerpc/cpu_families.rst
@@ -9,7 +9,9 @@
 Book3S (aka sPAPR)
 ------------------
 
-- Hash MMU
+- Hash MMU (except 603 and e300)
+- Software loaded TLB (603 and e300)
+- Selectable Software loaded TLB in addition to hash MMU (755, 7450, e600)
 - Mix of 32 & 64 bit::
 
    +--------------+                 +----------------+
@@ -24,9 +26,9 @@
           |                                 |
           |                                 |
           v                                 v
-   +--------------+                 +----------------+      +-------+
-   |     604      |                 |    750 (G3)    | ---> | 750CX |
-   +--------------+                 +----------------+      +-------+
+   +--------------+    +-----+      +----------------+      +-------+
+   |     604      |    | 755 | <--- |    750 (G3)    | ---> | 750CX |
+   +--------------+    +-----+      +----------------+      +-------+
           |                                 |                   |
           |                                 |                   |
           v                                 v                   v
diff --git a/Documentation/powerpc/mpc52xx.rst b/Documentation/powerpc/mpc52xx.rst
index 8676ac6..3026070 100644
--- a/Documentation/powerpc/mpc52xx.rst
+++ b/Documentation/powerpc/mpc52xx.rst
@@ -2,7 +2,7 @@
 Linux 2.6.x on MPC52xx family
 =============================
 
-For the latest info, go to http://www.246tNt.com/mpc52xx/
+For the latest info, go to https://www.246tNt.com/mpc52xx/
 
 To compile/use :
 
diff --git a/Documentation/powerpc/syscall64-abi.rst b/Documentation/powerpc/syscall64-abi.rst
index e49f69f..46caaad 100644
--- a/Documentation/powerpc/syscall64-abi.rst
+++ b/Documentation/powerpc/syscall64-abi.rst
@@ -5,6 +5,15 @@
 syscall
 =======
 
+Invocation
+----------
+The syscall is made with the sc instruction, and returns with execution
+continuing at the instruction following the sc instruction.
+
+If PPC_FEATURE2_SCV appears in the AT_HWCAP2 ELF auxiliary vector, the
+scv 0 instruction is an alternative that may provide better performance,
+with some differences to calling sequence.
+
 syscall calling sequence\ [1]_ matches the Power Architecture 64-bit ELF ABI
 specification C function calling sequence, including register preservation
 rules, with the following differences.
@@ -12,16 +21,23 @@
 .. [1] Some syscalls (typically low-level management functions) may have
        different calling sequences (e.g., rt_sigreturn).
 
-Parameters and return value
----------------------------
+Parameters
+----------
 The system call number is specified in r0.
 
 There is a maximum of 6 integer parameters to a syscall, passed in r3-r8.
 
-Both a return value and a return error code are returned. cr0.SO is the return
-error code, and r3 is the return value or error code. When cr0.SO is clear,
-the syscall succeeded and r3 is the return value. When cr0.SO is set, the
-syscall failed and r3 is the error code that generally corresponds to errno.
+Return value
+------------
+- For the sc instruction, both a value and an error condition are returned.
+  cr0.SO is the error condition, and r3 is the return value. When cr0.SO is
+  clear, the syscall succeeded and r3 is the return value. When cr0.SO is set,
+  the syscall failed and r3 is the error value (that normally corresponds to
+  errno).
+
+- For the scv 0 instruction, the return value indicates failure if it is
+  -4095..-1 (i.e., it is >= -MAX_ERRNO (-4095) as an unsigned comparison),
+  in which case the error value is the negated return value.
 
 Stack
 -----
@@ -34,22 +50,23 @@
 following differences:
 
 =========== ============= ========================================
+--- For the sc instruction, differences with the ELF ABI ---
 r0          Volatile      (System call number.)
 r3          Volatile      (Parameter 1, and return value.)
 r4-r8       Volatile      (Parameters 2-6.)
-cr0         Volatile      (cr0.SO is the return error condition)
+cr0         Volatile      (cr0.SO is the return error condition.)
 cr1, cr5-7  Nonvolatile
 lr          Nonvolatile
+
+--- For the scv 0 instruction, differences with the ELF ABI ---
+r0          Volatile      (System call number.)
+r3          Volatile      (Parameter 1, and return value.)
+r4-r8       Volatile      (Parameters 2-6.)
 =========== ============= ========================================
 
 All floating point and vector data registers as well as control and status
 registers are nonvolatile.
 
-Invocation
-----------
-The syscall is performed with the sc instruction, and returns with execution
-continuing at the instruction following the sc instruction.
-
 Transactional Memory
 --------------------
 Syscall behavior can change if the processor is in transactional or suspended
@@ -75,6 +92,7 @@
   returning to the caller. This case is not well defined or supported, so this
   behavior should not be relied upon.
 
+scv 0 syscalls will always behave as PPC_FEATURE2_HTM_NOSC.
 
 vsyscall
 ========
diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index cdfd981..eb3a131 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -2160,9 +2160,12 @@
   PPC     KVM_REG_PPC_MMCRA               64
   PPC     KVM_REG_PPC_MMCR2               64
   PPC     KVM_REG_PPC_MMCRS               64
+  PPC     KVM_REG_PPC_MMCR3               64
   PPC     KVM_REG_PPC_SIAR                64
   PPC     KVM_REG_PPC_SDAR                64
   PPC     KVM_REG_PPC_SIER                64
+  PPC     KVM_REG_PPC_SIER2               64
+  PPC     KVM_REG_PPC_SIER3               64
   PPC     KVM_REG_PPC_PMC1                32
   PPC     KVM_REG_PPC_PMC2                32
   PPC     KVM_REG_PPC_PMC3                32
diff --git a/MAINTAINERS b/MAINTAINERS
index a45fe1a..9590b98 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13308,7 +13308,6 @@
 
 PCI ENHANCED ERROR HANDLING (EEH) FOR POWERPC
 M:	Russell Currey <ruscur@russell.cc>
-M:	Sam Bobroff <sbobroff@linux.ibm.com>
 M:	Oliver O'Halloran <oohall@gmail.com>
 L:	linuxppc-dev@lists.ozlabs.org
 S:	Supported
diff --git a/arch/m68k/include/asm/adb_iop.h b/arch/m68k/include/asm/adb_iop.h
index 195d7fb..6aecd02 100644
--- a/arch/m68k/include/asm/adb_iop.h
+++ b/arch/m68k/include/asm/adb_iop.h
@@ -29,6 +29,7 @@
 
 #define ADB_IOP_EXPLICIT	0x80	/* nonzero if explicit command */
 #define ADB_IOP_AUTOPOLL	0x40	/* auto/SRQ polling enabled    */
+#define ADB_IOP_SET_AUTOPOLL	0x20	/* set autopoll device list    */
 #define ADB_IOP_SRQ		0x04	/* SRQ detected                */
 #define ADB_IOP_TIMEOUT		0x02	/* nonzero if timeout          */
 
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 29a0579..1f48bbf 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -131,6 +131,7 @@
 	select ARCH_HAS_PTE_DEVMAP		if PPC_BOOK3S_64
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_MEMBARRIER_CALLBACKS
+	select ARCH_HAS_MEMBARRIER_SYNC_CORE
 	select ARCH_HAS_SCALED_CPUTIME		if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
 	select ARCH_HAS_STRICT_KERNEL_RWX	if (PPC32 && !HIBERNATION)
 	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
@@ -145,6 +146,8 @@
 	select ARCH_SUPPORTS_ATOMIC_RMW
 	select ARCH_USE_BUILTIN_BSWAP
 	select ARCH_USE_CMPXCHG_LOCKREF		if PPC64
+	select ARCH_USE_QUEUED_RWLOCKS		if PPC_QUEUED_SPINLOCKS
+	select ARCH_USE_QUEUED_SPINLOCKS	if PPC_QUEUED_SPINLOCKS
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select ARCH_WEAK_RELEASE_ACQUIRE
 	select BINFMT_ELF
@@ -491,6 +494,19 @@
 
 	  Say N if you are unsure.
 
+config PPC_QUEUED_SPINLOCKS
+	bool "Queued spinlocks"
+	depends on SMP
+	help
+	  Say Y here to use queued spinlocks which give better scalability and
+	  fairness on large SMP and NUMA systems without harming single threaded
+	  performance.
+
+	  This option is currently experimental, the code is more complex and
+	  less tested so it defaults to "N" for the moment.
+
+	  If unsure, say "N".
+
 config ARCH_CPU_PROBE_RELEASE
 	def_bool y
 	depends on HOTPLUG_CPU
@@ -834,13 +850,16 @@
 	  this in mind when choosing a value for this option.
 
 config PPC_SUBPAGE_PROT
-	bool "Support setting protections for 4k subpages"
+	bool "Support setting protections for 4k subpages (subpage_prot syscall)"
+	default n
 	depends on PPC_BOOK3S_64 && PPC_64K_PAGES
 	help
-	  This option adds support for a system call to allow user programs
+	  This option adds support for system call to allow user programs
 	  to set access permissions (read/write, readonly, or no access)
 	  on the 4k subpages of each 64k page.
 
+	  If unsure, say N here.
+
 config PPC_COPRO_BASE
 	bool
 
@@ -860,12 +879,8 @@
 	  Add support for handling denormalisation of single precision
 	  values.  Useful for bare metal only.  If unsure say Y here.
 
-config CMDLINE_BOOL
-	bool "Default bootloader kernel arguments"
-
 config CMDLINE
-	string "Initial kernel command string" if CMDLINE_BOOL
-	default "console=ttyS0,9600 console=tty0 root=/dev/sda2" if CMDLINE_BOOL
+	string "Initial kernel command string"
 	default ""
 	help
 	  On some platforms, there is currently no way for the boot loader to
@@ -1199,6 +1214,7 @@
 config TASK_SIZE
 	hex "Size of user task space" if TASK_SIZE_BOOL
 	default "0x80000000" if PPC_8xx
+	default "0xb0000000" if PPC_BOOK3S_32 && STRICT_KERNEL_RWX
 	default "0xc0000000"
 endmenu
 
diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
index f310c32..3e8da9c 100644
--- a/arch/powerpc/Makefile
+++ b/arch/powerpc/Makefile
@@ -246,7 +246,8 @@
 # often slow when they are implemented at all
 KBUILD_CFLAGS		+= $(call cc-option,-mno-string)
 
-cpu-as-$(CONFIG_4xx)		+= -Wa,-m405
+cpu-as-$(CONFIG_40x)		+= -Wa,-m405
+cpu-as-$(CONFIG_44x)		+= -Wa,-m440
 cpu-as-$(CONFIG_ALTIVEC)	+= $(call as-option,-Wa$(comma)-maltivec)
 cpu-as-$(CONFIG_E200)		+= -Wa,-me200
 cpu-as-$(CONFIG_E500)		+= -Wa,-me500
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 63d7456..44af715 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -70,10 +70,10 @@
 DTC_FLAGS	?= -p 1024
 
 $(obj)/4xx.o: BOOTCFLAGS += -mcpu=405
-$(obj)/ebony.o: BOOTCFLAGS += -mcpu=405
+$(obj)/ebony.o: BOOTCFLAGS += -mcpu=440
 $(obj)/cuboot-hotfoot.o: BOOTCFLAGS += -mcpu=405
-$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=405
-$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=405
+$(obj)/cuboot-taishan.o: BOOTCFLAGS += -mcpu=440
+$(obj)/cuboot-katmai.o: BOOTCFLAGS += -mcpu=440
 $(obj)/cuboot-acadia.o: BOOTCFLAGS += -mcpu=405
 $(obj)/treeboot-iss4xx.o: BOOTCFLAGS += -mcpu=405
 $(obj)/treeboot-currituck.o: BOOTCFLAGS += -mcpu=405
@@ -117,7 +117,7 @@
 		elf_util.c $(zlib-y) devtree.c stdlib.c \
 		oflib.c ofconsole.c cuboot.c
 
-src-wlib-$(CONFIG_PPC_MPC52XX) += mpc52xx-psc.c
+src-wlib-$(CONFIG_PPC_MPC52xx) += mpc52xx-psc.c
 src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S opal.c
 ifndef CONFIG_PPC64_BOOT_WRAPPER
 src-wlib-y += crtsavres.S
diff --git a/arch/powerpc/boot/dts/akebono.dts b/arch/powerpc/boot/dts/akebono.dts
index cd9d660..df18f8d 100644
--- a/arch/powerpc/boot/dts/akebono.dts
+++ b/arch/powerpc/boot/dts/akebono.dts
@@ -248,7 +248,7 @@
 			};
 		};
 
-		PCIE0: pciex@10100000000 {
+		PCIE0: pcie@10100000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -288,7 +288,7 @@
 				0x0 0x0 0x0 0x4 &MPIC 48 0x2 /* int D */>;
 		};
 
-		PCIE1: pciex@20100000000 {
+		PCIE1: pcie@20100000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -328,7 +328,7 @@
 				0x0 0x0 0x0 0x4 &MPIC 56 0x2 /* int D */>;
 		};
 
-		PCIE2: pciex@18100000000 {
+		PCIE2: pcie@18100000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -368,7 +368,7 @@
 				0x0 0x0 0x0 0x4 &MPIC 64 0x2 /* int D */>;
 		};
 
-		PCIE3: pciex@28100000000 {
+		PCIE3: pcie@28100000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/bluestone.dts b/arch/powerpc/boot/dts/bluestone.dts
index cc965a1..aa1ae94 100644
--- a/arch/powerpc/boot/dts/bluestone.dts
+++ b/arch/powerpc/boot/dts/bluestone.dts
@@ -325,7 +325,7 @@
 			};
 		};
 
-		PCIE0: pciex@d00000000 {
+		PCIE0: pcie@d00000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/canyonlands.dts b/arch/powerpc/boot/dts/canyonlands.dts
index 0d6ac92..c5fbb08 100644
--- a/arch/powerpc/boot/dts/canyonlands.dts
+++ b/arch/powerpc/boot/dts/canyonlands.dts
@@ -461,7 +461,7 @@
 			interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >;
 		};
 
-		PCIE0: pciex@d00000000 {
+		PCIE0: pcie@d00000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -503,7 +503,7 @@
 				0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>;
 		};
 
-		PCIE1: pciex@d20000000 {
+		PCIE1: pcie@d20000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/currituck.dts b/arch/powerpc/boot/dts/currituck.dts
index b6d87b9..aea8af8 100644
--- a/arch/powerpc/boot/dts/currituck.dts
+++ b/arch/powerpc/boot/dts/currituck.dts
@@ -122,7 +122,7 @@
 			};
 		};
 
-		PCIE0: pciex@10100000000 {		// 4xGBIF1
+		PCIE0: pcie@10100000000 {		// 4xGBIF1
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -160,7 +160,7 @@
 				0x0 0x0 0x0 0x4 &MPIC 49 0x2 /* int D */>;
 		};
 
-		PCIE1: pciex@30100000000 {		// 4xGBIF0
+		PCIE1: pcie@30100000000 {		// 4xGBIF0
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -197,7 +197,7 @@
 				0x0 0x0 0x0 0x4 &MPIC 41 0x2 /* int D */>;
 		};
 
-		PCIE2: pciex@38100000000 {		// 2xGBIF0
+		PCIE2: pcie@38100000000 {		// 2xGBIF0
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/fsl/p4080ds.dts b/arch/powerpc/boot/dts/fsl/p4080ds.dts
index 65e2015..969b32c 100644
--- a/arch/powerpc/boot/dts/fsl/p4080ds.dts
+++ b/arch/powerpc/boot/dts/fsl/p4080ds.dts
@@ -125,11 +125,11 @@
 
 		i2c@118100 {
 			eeprom@51 {
-				compatible = "atmel,24c256";
+				compatible = "atmel,spd";
 				reg = <0x51>;
 			};
 			eeprom@52 {
-				compatible = "atmel,24c256";
+				compatible = "atmel,spd";
 				reg = <0x52>;
 			};
 			rtc@68 {
@@ -143,6 +143,45 @@
 			};
 		};
 
+		i2c@118000 {
+			zl2006@21 {
+				compatible = "zl2006";
+				reg = <0x21>;
+			};
+			zl2006@22 {
+				compatible = "zl2006";
+				reg = <0x22>;
+			};
+			zl2006@23 {
+				compatible = "zl2006";
+				reg = <0x23>;
+			};
+			zl2006@24 {
+				compatible = "zl2006";
+				reg = <0x24>;
+			};
+			eeprom@50 {
+				compatible = "atmel,24c64";
+				reg = <0x50>;
+			};
+			eeprom@55 {
+				compatible = "atmel,24c64";
+				reg = <0x55>;
+			};
+			eeprom@56 {
+				compatible = "atmel,24c64";
+				reg = <0x56>;
+			};
+			eeprom@57 {
+				compatible = "atmel,24c02";
+				reg = <0x57>;
+			};
+		};
+
+		i2c@119100 {
+			/* 0x6E: ICS9FG108 */
+		};
+
 		usb0: usb@210000 {
 			phy_type = "ulpi";
 		};
diff --git a/arch/powerpc/boot/dts/glacier.dts b/arch/powerpc/boot/dts/glacier.dts
index a7a802f..e84ff1a 100644
--- a/arch/powerpc/boot/dts/glacier.dts
+++ b/arch/powerpc/boot/dts/glacier.dts
@@ -489,7 +489,7 @@
 			interrupt-map = < 0x0 0x0 0x0 0x0 &UIC1 0x0 0x8 >;
 		};
 
-		PCIE0: pciex@d00000000 {
+		PCIE0: pcie@d00000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -531,7 +531,7 @@
 				0x0 0x0 0x0 0x4 &UIC3 0xf 0x4 /* swizzled int D */>;
 		};
 
-		PCIE1: pciex@d20000000 {
+		PCIE1: pcie@d20000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/haleakala.dts b/arch/powerpc/boot/dts/haleakala.dts
index cb16dad..f81ce87 100644
--- a/arch/powerpc/boot/dts/haleakala.dts
+++ b/arch/powerpc/boot/dts/haleakala.dts
@@ -237,7 +237,7 @@
 			};
 		};
 
-		PCIE0: pciex@a0000000 {
+		PCIE0: pcie@a0000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/icon.dts b/arch/powerpc/boot/dts/icon.dts
index 2e6e3a7..fbaa60b 100644
--- a/arch/powerpc/boot/dts/icon.dts
+++ b/arch/powerpc/boot/dts/icon.dts
@@ -315,7 +315,7 @@
 			interrupt-map = <0x0 0x0 0x0 0x0 &UIC1 19 0x8>;
 		};
 
-		PCIE0: pciex@d00000000 {
+		PCIE0: pcie@d00000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -356,7 +356,7 @@
 				0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>;
 		};
 
-		PCIE1: pciex@d20000000 {
+		PCIE1: pcie@d20000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/katmai.dts b/arch/powerpc/boot/dts/katmai.dts
index 02629e1..a8f3532 100644
--- a/arch/powerpc/boot/dts/katmai.dts
+++ b/arch/powerpc/boot/dts/katmai.dts
@@ -319,7 +319,7 @@
 			>;
 		};
 
-		PCIE0: pciex@d00000000 {
+		PCIE0: pcie@d00000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -360,7 +360,7 @@
 				0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>;
 		};
 
-		PCIE1: pciex@d20000000 {
+		PCIE1: pcie@d20000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -401,7 +401,7 @@
 				0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>;
 		};
 
-		PCIE2: pciex@d40000000 {
+		PCIE2: pcie@d40000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/kilauea.dts b/arch/powerpc/boot/dts/kilauea.dts
index 2a34132..a709fb4 100644
--- a/arch/powerpc/boot/dts/kilauea.dts
+++ b/arch/powerpc/boot/dts/kilauea.dts
@@ -322,7 +322,7 @@
 			};
 		};
 
-		PCIE0: pciex@a0000000 {
+		PCIE0: pcie@a0000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -363,7 +363,7 @@
 				0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
 		};
 
-		PCIE1: pciex@c0000000 {
+		PCIE1: pcie@c0000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/makalu.dts b/arch/powerpc/boot/dts/makalu.dts
index bf8fe16..c473cd9 100644
--- a/arch/powerpc/boot/dts/makalu.dts
+++ b/arch/powerpc/boot/dts/makalu.dts
@@ -268,7 +268,7 @@
 			};
 		};
 
-		PCIE0: pciex@a0000000 {
+		PCIE0: pcie@a0000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -309,7 +309,7 @@
 				0x0 0x0 0x0 0x4 &UIC2 0x3 0x4 /* swizzled int D */>;
 		};
 
-		PCIE1: pciex@c0000000 {
+		PCIE1: pcie@c0000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/dts/redwood.dts b/arch/powerpc/boot/dts/redwood.dts
index f3e046f..f38035a 100644
--- a/arch/powerpc/boot/dts/redwood.dts
+++ b/arch/powerpc/boot/dts/redwood.dts
@@ -235,7 +235,7 @@
 				has-new-stacr-staopc;
 			};
 		};
-		PCIE0: pciex@d00000000 {
+		PCIE0: pcie@d00000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -276,7 +276,7 @@
 				0x0 0x0 0x0 0x4 &UIC3 0x3 0x4 /* swizzled int D */>;
 		};
 
-		PCIE1: pciex@d20000000 {
+		PCIE1: pcie@d20000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
@@ -317,7 +317,7 @@
 				0x0 0x0 0x0 0x4 &UIC3 0x7 0x4 /* swizzled int D */>;
 		};
 
-		PCIE2: pciex@d40000000 {
+		PCIE2: pcie@d40000000 {
 			device_type = "pci";
 			#interrupt-cells = <1>;
 			#size-cells = <2>;
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index a9d2091..cae31a6 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -104,7 +104,7 @@
 {
 	/* If we have an image attached to us, it overrides anything
 	 * supplied by the loader. */
-	if (_initrd_end > _initrd_start) {
+	if (&_initrd_end > &_initrd_start) {
 		printf("Attached initrd image at 0x%p-0x%p\n\r",
 		       _initrd_start, _initrd_end);
 		initrd_addr = (unsigned long)_initrd_start;
@@ -152,7 +152,7 @@
 	unsigned long esm_blob_addr, esm_blob_size;
 
 	/* Do we have an ESM (Enter Secure Mode) blob? */
-	if (_esm_blob_end <= _esm_blob_start)
+	if (&_esm_blob_end <= &_esm_blob_start)
 		return;
 
 	printf("Attached ESM blob at 0x%p-0x%p\n\r",
diff --git a/arch/powerpc/boot/ps3.c b/arch/powerpc/boot/ps3.c
index c52552a..6e4efbd 100644
--- a/arch/powerpc/boot/ps3.c
+++ b/arch/powerpc/boot/ps3.c
@@ -127,7 +127,7 @@
 	ps3_repository_read_rm_size(&rm_size);
 	dt_fixup_memory(0, rm_size);
 
-	if (_initrd_end > _initrd_start) {
+	if (&_initrd_end > &_initrd_start) {
 		setprop_val(chosen, "linux,initrd-start", (u32)(_initrd_start));
 		setprop_val(chosen, "linux,initrd-end", (u32)(_initrd_end));
 	}
diff --git a/arch/powerpc/boot/serial.c b/arch/powerpc/boot/serial.c
index 0bfa7e8..9a19e59 100644
--- a/arch/powerpc/boot/serial.c
+++ b/arch/powerpc/boot/serial.c
@@ -128,7 +128,7 @@
 	         dt_is_compatible(devp, "fsl,cpm2-smc-uart"))
 		rc = cpm_console_init(devp, &serial_cd);
 #endif
-#ifdef CONFIG_PPC_MPC52XX
+#ifdef CONFIG_PPC_MPC52xx
 	else if (dt_is_compatible(devp, "fsl,mpc5200-psc-uart"))
 		rc = mpc5200_psc_console_init(devp, &serial_cd);
 #endif
diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
index 7705a5c..3894ba8 100644
--- a/arch/powerpc/configs/44x/akebono_defconfig
+++ b/arch/powerpc/configs/44x/akebono_defconfig
@@ -19,8 +19,6 @@
 CONFIG_HZ_100=y
 CONFIG_IRQ_ALL_CPUS=y
 # CONFIG_COMPACTION is not set
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 # CONFIG_SUSPEND is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
@@ -58,7 +56,6 @@
 # CONFIG_NET_VENDOR_DEC is not set
 # CONFIG_NET_VENDOR_DLINK is not set
 # CONFIG_NET_VENDOR_EMULEX is not set
-# CONFIG_NET_VENDOR_EXAR is not set
 CONFIG_IBM_EMAC=y
 # CONFIG_NET_VENDOR_MARVELL is not set
 # CONFIG_NET_VENDOR_MELLANOX is not set
diff --git a/arch/powerpc/configs/44x/arches_defconfig b/arch/powerpc/configs/44x/arches_defconfig
index 82c6f49..41d04e7 100644
--- a/arch/powerpc/configs/44x/arches_defconfig
+++ b/arch/powerpc/configs/44x/arches_defconfig
@@ -11,8 +11,6 @@
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_EBONY is not set
 CONFIG_ARCHES=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/bamboo_defconfig b/arch/powerpc/configs/44x/bamboo_defconfig
index 6792132..acbce71 100644
--- a/arch/powerpc/configs/44x/bamboo_defconfig
+++ b/arch/powerpc/configs/44x/bamboo_defconfig
@@ -9,8 +9,6 @@
 # CONFIG_BLK_DEV_BSG is not set
 CONFIG_BAMBOO=y
 # CONFIG_EBONY is not set
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/bluestone_defconfig b/arch/powerpc/configs/44x/bluestone_defconfig
index 8006a57..37088f2 100644
--- a/arch/powerpc/configs/44x/bluestone_defconfig
+++ b/arch/powerpc/configs/44x/bluestone_defconfig
@@ -11,8 +11,6 @@
 # CONFIG_COMPAT_BRK is not set
 CONFIG_BLUESTONE=y
 # CONFIG_EBONY is not set
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/canyonlands_defconfig b/arch/powerpc/configs/44x/canyonlands_defconfig
index ccc14eb..61776ad 100644
--- a/arch/powerpc/configs/44x/canyonlands_defconfig
+++ b/arch/powerpc/configs/44x/canyonlands_defconfig
@@ -11,8 +11,6 @@
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_EBONY is not set
 CONFIG_CANYONLANDS=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig
index be76e06..34c86b3 100644
--- a/arch/powerpc/configs/44x/currituck_defconfig
+++ b/arch/powerpc/configs/44x/currituck_defconfig
@@ -17,8 +17,6 @@
 CONFIG_HZ_100=y
 CONFIG_MATH_EMULATION=y
 CONFIG_IRQ_ALL_CPUS=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 # CONFIG_SUSPEND is not set
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/powerpc/configs/44x/eiger_defconfig b/arch/powerpc/configs/44x/eiger_defconfig
index 1abaa63..509300f 100644
--- a/arch/powerpc/configs/44x/eiger_defconfig
+++ b/arch/powerpc/configs/44x/eiger_defconfig
@@ -10,8 +10,6 @@
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_EBONY is not set
 CONFIG_EIGER=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_PCIEPORTBUS=y
 # CONFIG_PCIEASPM is not set
 CONFIG_NET=y
diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
index e67fc04..30845ce 100644
--- a/arch/powerpc/configs/44x/fsp2_defconfig
+++ b/arch/powerpc/configs/44x/fsp2_defconfig
@@ -28,7 +28,6 @@
 CONFIG_SWIOTLB=y
 CONFIG_KEXEC=y
 CONFIG_CRASH_DUMP=y
-CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="ip=on rw"
 # CONFIG_SUSPEND is not set
 # CONFIG_PCI is not set
diff --git a/arch/powerpc/configs/44x/icon_defconfig b/arch/powerpc/configs/44x/icon_defconfig
index 7d7ff84..930948a 100644
--- a/arch/powerpc/configs/44x/icon_defconfig
+++ b/arch/powerpc/configs/44x/icon_defconfig
@@ -9,8 +9,6 @@
 # CONFIG_EBONY is not set
 CONFIG_ICON=y
 CONFIG_HIGHMEM=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_PCIEPORTBUS=y
 # CONFIG_PCIEASPM is not set
 CONFIG_NET=y
diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig
index fb5c73a..2c3834e 100644
--- a/arch/powerpc/configs/44x/iss476-smp_defconfig
+++ b/arch/powerpc/configs/44x/iss476-smp_defconfig
@@ -17,7 +17,6 @@
 CONFIG_HZ_100=y
 CONFIG_MATH_EMULATION=y
 CONFIG_IRQ_ALL_CPUS=y
-CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="root=/dev/issblk0"
 # CONFIG_PCI is not set
 CONFIG_ADVANCED_OPTIONS=y
diff --git a/arch/powerpc/configs/44x/katmai_defconfig b/arch/powerpc/configs/44x/katmai_defconfig
index c6dc144..1a0f1c3 100644
--- a/arch/powerpc/configs/44x/katmai_defconfig
+++ b/arch/powerpc/configs/44x/katmai_defconfig
@@ -9,8 +9,6 @@
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_EBONY is not set
 CONFIG_KATMAI=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/rainier_defconfig b/arch/powerpc/configs/44x/rainier_defconfig
index c83ad031..6dd67de 100644
--- a/arch/powerpc/configs/44x/rainier_defconfig
+++ b/arch/powerpc/configs/44x/rainier_defconfig
@@ -10,8 +10,6 @@
 # CONFIG_EBONY is not set
 CONFIG_RAINIER=y
 CONFIG_MATH_EMULATION=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/redwood_defconfig b/arch/powerpc/configs/44x/redwood_defconfig
index 640fe1d..e28d764 100644
--- a/arch/powerpc/configs/44x/redwood_defconfig
+++ b/arch/powerpc/configs/44x/redwood_defconfig
@@ -10,8 +10,6 @@
 CONFIG_MODULE_UNLOAD=y
 # CONFIG_EBONY is not set
 CONFIG_REDWOOD=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_PCIEPORTBUS=y
 # CONFIG_PCIEASPM is not set
 CONFIG_NET=y
diff --git a/arch/powerpc/configs/44x/sam440ep_defconfig b/arch/powerpc/configs/44x/sam440ep_defconfig
index 22dc0da..ef09786 100644
--- a/arch/powerpc/configs/44x/sam440ep_defconfig
+++ b/arch/powerpc/configs/44x/sam440ep_defconfig
@@ -12,8 +12,6 @@
 CONFIG_AMIGA_PARTITION=y
 # CONFIG_EBONY is not set
 CONFIG_SAM440EP=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/sequoia_defconfig b/arch/powerpc/configs/44x/sequoia_defconfig
index 2c0973d..b4984ea 100644
--- a/arch/powerpc/configs/44x/sequoia_defconfig
+++ b/arch/powerpc/configs/44x/sequoia_defconfig
@@ -11,8 +11,6 @@
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_EBONY is not set
 CONFIG_SEQUOIA=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/taishan_defconfig b/arch/powerpc/configs/44x/taishan_defconfig
index a2d355c..3ea5932 100644
--- a/arch/powerpc/configs/44x/taishan_defconfig
+++ b/arch/powerpc/configs/44x/taishan_defconfig
@@ -9,8 +9,6 @@
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_EBONY is not set
 CONFIG_TAISHAN=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig
index af66c69..47252c2 100644
--- a/arch/powerpc/configs/44x/warp_defconfig
+++ b/arch/powerpc/configs/44x/warp_defconfig
@@ -14,7 +14,6 @@
 CONFIG_WARP=y
 CONFIG_PPC4xx_GPIO=y
 CONFIG_HZ_1000=y
-CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="ip=on"
 # CONFIG_PCI is not set
 CONFIG_NET=y
diff --git a/arch/powerpc/configs/85xx-hw.config b/arch/powerpc/configs/85xx-hw.config
index b507df6..524db76 100644
--- a/arch/powerpc/configs/85xx-hw.config
+++ b/arch/powerpc/configs/85xx-hw.config
@@ -67,7 +67,6 @@
 CONFIG_MTD_CFI_INTELEXT=y
 CONFIG_MTD_CFI=y
 CONFIG_MTD_CMDLINE_PARTS=y
-CONFIG_MTD_M25P80=y
 CONFIG_MTD_NAND_FSL_ELBC=y
 CONFIG_MTD_NAND_FSL_IFC=y
 CONFIG_MTD_RAW_NAND=y
diff --git a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
index d50aca6..3a6381a 100644
--- a/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
+++ b/arch/powerpc/configs/85xx/xes_mpc85xx_defconfig
@@ -51,9 +51,6 @@
 CONFIG_IP_MROUTE=y
 CONFIG_IP_PIMSM_V1=y
 CONFIG_IP_PIMSM_V2=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_MTD=y
 CONFIG_MTD_REDBOOT_PARTS=y
 CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/86xx-hw.config b/arch/powerpc/configs/86xx-hw.config
index 151164c..0cb24b3 100644
--- a/arch/powerpc/configs/86xx-hw.config
+++ b/arch/powerpc/configs/86xx-hw.config
@@ -32,8 +32,6 @@
 CONFIG_HZ_1000=y
 CONFIG_I2C_MPC=y
 CONFIG_I2C=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
 CONFIG_INPUT_FF_MEMLESS=m
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSEDEV is not set
diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config
index 3c7dad1..df37efe 100644
--- a/arch/powerpc/configs/fsl-emb-nonhw.config
+++ b/arch/powerpc/configs/fsl-emb-nonhw.config
@@ -56,7 +56,6 @@
 CONFIG_INET_AH=y
 CONFIG_INET_ESP=y
 CONFIG_INET_IPCOMP=y
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_INET=y
 CONFIG_IP_ADVANCED_ROUTER=y
 CONFIG_IP_MROUTE=y
diff --git a/arch/powerpc/configs/g5_defconfig b/arch/powerpc/configs/g5_defconfig
index a68c7f3..1c674c4 100644
--- a/arch/powerpc/configs/g5_defconfig
+++ b/arch/powerpc/configs/g5_defconfig
@@ -51,7 +51,6 @@
 CONFIG_NF_CONNTRACK_IRC=m
 CONFIG_NF_CONNTRACK_TFTP=m
 CONFIG_NF_CT_NETLINK=m
-CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_BLK_DEV_LOOP=y
diff --git a/arch/powerpc/configs/holly_defconfig b/arch/powerpc/configs/holly_defconfig
index 067f433..271daff 100644
--- a/arch/powerpc/configs/holly_defconfig
+++ b/arch/powerpc/configs/holly_defconfig
@@ -13,7 +13,6 @@
 CONFIG_PPC_HOLLY=y
 CONFIG_GEN_RTC=y
 CONFIG_BINFMT_MISC=y
-CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="console=ttyS0,115200"
 # CONFIG_SECCOMP is not set
 CONFIG_NET=y
diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig
index ea59f3d..d4be64f 100644
--- a/arch/powerpc/configs/linkstation_defconfig
+++ b/arch/powerpc/configs/linkstation_defconfig
@@ -37,7 +37,6 @@
 CONFIG_NETFILTER_XT_MATCH_MAC=m
 CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
diff --git a/arch/powerpc/configs/mpc512x_defconfig b/arch/powerpc/configs/mpc512x_defconfig
index e39346b..e75d3f3 100644
--- a/arch/powerpc/configs/mpc512x_defconfig
+++ b/arch/powerpc/configs/mpc512x_defconfig
@@ -47,7 +47,6 @@
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_COUNT=1
 CONFIG_BLK_DEV_RAM_SIZE=8192
-CONFIG_BLK_DEV_RAM_DAX=y
 CONFIG_EEPROM_AT24=y
 CONFIG_EEPROM_AT25=y
 CONFIG_SCSI=y
diff --git a/arch/powerpc/configs/mpc83xx_defconfig b/arch/powerpc/configs/mpc83xx_defconfig
index be12572..95d43f8 100644
--- a/arch/powerpc/configs/mpc83xx_defconfig
+++ b/arch/powerpc/configs/mpc83xx_defconfig
@@ -19,7 +19,6 @@
 CONFIG_MPC836x_RDK=y
 CONFIG_MPC837x_MDS=y
 CONFIG_MPC837x_RDB=y
-CONFIG_SBC834x=y
 CONFIG_ASP834x=y
 CONFIG_QE_GPIO=y
 CONFIG_MATH_EMULATION=y
diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig
index 0a0d046..1fed6be 100644
--- a/arch/powerpc/configs/mvme5100_defconfig
+++ b/arch/powerpc/configs/mvme5100_defconfig
@@ -20,10 +20,9 @@
 CONFIG_MVME5100=y
 CONFIG_KVM_GUEST=y
 CONFIG_HZ_100=y
+CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs"
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 # CONFIG_COMPACTION is not set
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0,9600 ip=dhcp root=/dev/nfs"
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
@@ -46,7 +45,6 @@
 CONFIG_NETFILTER_XT_MATCH_MAC=m
 CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
 CONFIG_NETFILTER_XT_MATCH_STATE=m
-CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
diff --git a/arch/powerpc/configs/pasemi_defconfig b/arch/powerpc/configs/pasemi_defconfig
index 08b7f4c..af9af03 100644
--- a/arch/powerpc/configs/pasemi_defconfig
+++ b/arch/powerpc/configs/pasemi_defconfig
@@ -58,7 +58,6 @@
 CONFIG_EEPROM_LEGACY=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=y
-CONFIG_CHR_DEV_OSST=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_CHR_DEV_SG=y
 CONFIG_CHR_DEV_SCH=y
diff --git a/arch/powerpc/configs/pmac32_defconfig b/arch/powerpc/configs/pmac32_defconfig
index 05e325c..665a8d7 100644
--- a/arch/powerpc/configs/pmac32_defconfig
+++ b/arch/powerpc/configs/pmac32_defconfig
@@ -75,7 +75,6 @@
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NETFILTER_XT_MATCH_TIME=m
 CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -90,13 +89,6 @@
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
 CONFIG_IP_DCCP=m
-CONFIG_IRDA=m
-CONFIG_IRLAN=m
-CONFIG_IRNET=m
-CONFIG_IRCOMM=m
-CONFIG_IRDA_CACHE_LAST_LSAP=y
-CONFIG_IRDA_FAST_RR=y
-CONFIG_IRTTY_SIR=m
 CONFIG_BT=m
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
diff --git a/arch/powerpc/configs/powernv_defconfig b/arch/powerpc/configs/powernv_defconfig
index 2de9aadf..cf30fc2 100644
--- a/arch/powerpc/configs/powernv_defconfig
+++ b/arch/powerpc/configs/powernv_defconfig
@@ -64,7 +64,6 @@
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
 CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
 CONFIG_SCHED_SMT=y
 CONFIG_PM=y
 CONFIG_HOTPLUG_PCI=y
@@ -246,7 +245,6 @@
 CONFIG_INFINIBAND_USER_MAD=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_CXGB3=m
 CONFIG_INFINIBAND_CXGB4=m
 CONFIG_MLX4_INFINIBAND=m
 CONFIG_INFINIBAND_IPOIB=m
diff --git a/arch/powerpc/configs/ppc40x_defconfig b/arch/powerpc/configs/ppc40x_defconfig
index 25f6c91..7e48693 100644
--- a/arch/powerpc/configs/ppc40x_defconfig
+++ b/arch/powerpc/configs/ppc40x_defconfig
@@ -20,9 +20,6 @@
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig
index 8d7e3e9..4875965 100644
--- a/arch/powerpc/configs/ppc64_defconfig
+++ b/arch/powerpc/configs/ppc64_defconfig
@@ -268,7 +268,6 @@
 CONFIG_INFINIBAND_USER_MAD=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_CXGB3=m
 CONFIG_INFINIBAND_CXGB4=m
 CONFIG_MLX4_INFINIBAND=m
 CONFIG_INFINIBAND_IPOIB=m
diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
index feb5d47..5e6f92b 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -53,7 +53,6 @@
 CONFIG_MPC836x_RDK=y
 CONFIG_MPC837x_MDS=y
 CONFIG_MPC837x_RDB=y
-CONFIG_SBC834x=y
 CONFIG_ASP834x=y
 CONFIG_PPC_86xx=y
 CONFIG_MPC8641_HPCN=y
@@ -187,7 +186,6 @@
 CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
 CONFIG_NETFILTER_XT_MATCH_TIME=m
 CONFIG_NETFILTER_XT_MATCH_U32=m
-CONFIG_NF_CONNTRACK_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
 CONFIG_IP_NF_MATCH_ECN=m
@@ -203,7 +201,6 @@
 CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
 CONFIG_IP6_NF_MATCH_EUI64=m
@@ -241,7 +238,6 @@
 CONFIG_BRIDGE_EBT_LOG=m
 CONFIG_BRIDGE_EBT_NFLOG=m
 CONFIG_IP_DCCP=m
-CONFIG_NET_DCCPPROBE=m
 CONFIG_TIPC=m
 CONFIG_ATM=m
 CONFIG_ATM_CLIP=m
@@ -251,7 +247,6 @@
 CONFIG_VLAN_8021Q=m
 CONFIG_DECNET=m
 CONFIG_DECNET_ROUTER=y
-CONFIG_IPX=m
 CONFIG_ATALK=m
 CONFIG_DEV_APPLETALK=m
 CONFIG_IPDDP=m
@@ -297,26 +292,6 @@
 CONFIG_NET_ACT_PEDIT=m
 CONFIG_NET_ACT_SIMP=m
 CONFIG_NET_ACT_SKBEDIT=m
-CONFIG_IRDA=m
-CONFIG_IRLAN=m
-CONFIG_IRNET=m
-CONFIG_IRCOMM=m
-CONFIG_IRDA_CACHE_LAST_LSAP=y
-CONFIG_IRDA_FAST_RR=y
-CONFIG_IRTTY_SIR=m
-CONFIG_KINGSUN_DONGLE=m
-CONFIG_KSDAZZLE_DONGLE=m
-CONFIG_KS959_DONGLE=m
-CONFIG_USB_IRDA=m
-CONFIG_SIGMATEL_FIR=m
-CONFIG_NSC_FIR=m
-CONFIG_WINBOND_FIR=m
-CONFIG_TOSHIBA_FIR=m
-CONFIG_SMC_IRCC_FIR=m
-CONFIG_ALI_FIR=m
-CONFIG_VLSI_FIR=m
-CONFIG_VIA_FIR=m
-CONFIG_MCS_FIR=m
 CONFIG_BT=m
 CONFIG_BT_RFCOMM=m
 CONFIG_BT_RFCOMM_TTY=y
@@ -332,7 +307,6 @@
 CONFIG_BT_HCIDTL1=m
 CONFIG_BT_HCIBT3C=m
 CONFIG_BT_HCIBLUECARD=m
-CONFIG_BT_HCIBTUART=m
 CONFIG_BT_HCIVHCI=m
 CONFIG_CFG80211=m
 CONFIG_MAC80211=m
@@ -366,7 +340,6 @@
 CONFIG_RAID_ATTRS=m
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=m
-CONFIG_CHR_DEV_OSST=m
 CONFIG_BLK_DEV_SR=m
 CONFIG_CHR_DEV_SG=y
 CONFIG_CHR_DEV_SCH=m
@@ -663,7 +636,6 @@
 CONFIG_I2C_PCA_PLATFORM=m
 CONFIG_I2C_SIMTEC=m
 CONFIG_I2C_PARPORT=m
-CONFIG_I2C_PARPORT_LIGHT=m
 CONFIG_I2C_TINY_USB=m
 CONFIG_I2C_PCA_ISA=m
 CONFIG_I2C_STUB=m
@@ -676,7 +648,6 @@
 CONFIG_W1_SLAVE_SMEM=m
 CONFIG_W1_SLAVE_DS2433=m
 CONFIG_W1_SLAVE_DS2433_CRC=y
-CONFIG_W1_SLAVE_DS2760=m
 CONFIG_APM_POWER=m
 CONFIG_BATTERY_PMU=m
 CONFIG_HWMON=m
@@ -1065,15 +1036,6 @@
 CONFIG_CIFS_XATTR=y
 CONFIG_CIFS_POSIX=y
 CONFIG_CIFS_DFS_UPCALL=y
-CONFIG_NCP_FS=m
-CONFIG_NCPFS_PACKET_SIGNING=y
-CONFIG_NCPFS_IOCTL_LOCKING=y
-CONFIG_NCPFS_STRONG=y
-CONFIG_NCPFS_NFS_NS=y
-CONFIG_NCPFS_OS2_NS=y
-CONFIG_NCPFS_SMALLDOS=y
-CONFIG_NCPFS_NLS=y
-CONFIG_NCPFS_EXTRAS=y
 CONFIG_CODA_FS=m
 CONFIG_9P_FS=m
 CONFIG_NLS_DEFAULT="utf8"
@@ -1117,7 +1079,6 @@
 CONFIG_DEBUG_INFO=y
 CONFIG_UNUSED_SYMBOLS=y
 CONFIG_HEADERS_INSTALL=y
-CONFIG_HEADERS_CHECK=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_OBJECTS=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 81b55c8..142f132 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -34,8 +34,6 @@
 # CONFIG_SPARSEMEM_VMEMMAP is not set
 # CONFIG_COMPACTION is not set
 CONFIG_SCHED_SMT=y
-CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE=""
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 # CONFIG_SECCOMP is not set
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index dfa4a72..d5dece9 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -57,7 +57,6 @@
 CONFIG_KSM=y
 CONFIG_TRANSPARENT_HUGEPAGE=y
 CONFIG_PPC_64K_PAGES=y
-CONFIG_PPC_SUBPAGE_PROT=y
 CONFIG_SCHED_SMT=y
 CONFIG_HOTPLUG_PCI=y
 CONFIG_HOTPLUG_PCI_RPA=m
@@ -94,6 +93,7 @@
 CONFIG_BLK_DEV_RAM=y
 CONFIG_BLK_DEV_RAM_SIZE=65536
 CONFIG_VIRTIO_BLK=m
+CONFIG_BLK_DEV_NVME=y
 CONFIG_BLK_DEV_SD=y
 CONFIG_CHR_DEV_ST=m
 CONFIG_BLK_DEV_SR=y
@@ -224,7 +224,6 @@
 CONFIG_INFINIBAND_USER_MAD=m
 CONFIG_INFINIBAND_USER_ACCESS=m
 CONFIG_INFINIBAND_MTHCA=m
-CONFIG_INFINIBAND_CXGB3=m
 CONFIG_INFINIBAND_CXGB4=m
 CONFIG_MLX4_INFINIBAND=m
 CONFIG_INFINIBAND_IPOIB=m
diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig
index ad6739a..b806a5d 100644
--- a/arch/powerpc/configs/skiroot_defconfig
+++ b/arch/powerpc/configs/skiroot_defconfig
@@ -45,7 +45,6 @@
 CONFIG_NUMA=y
 CONFIG_PPC_64K_PAGES=y
 CONFIG_SCHED_SMT=y
-CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet"
 # CONFIG_SECCOMP is not set
 # CONFIG_PPC_MEM_KEYS is not set
diff --git a/arch/powerpc/configs/storcenter_defconfig b/arch/powerpc/configs/storcenter_defconfig
index b964084..47dcfad 100644
--- a/arch/powerpc/configs/storcenter_defconfig
+++ b/arch/powerpc/configs/storcenter_defconfig
@@ -12,7 +12,6 @@
 CONFIG_STORCENTER=y
 CONFIG_HZ_100=y
 CONFIG_BINFMT_MISC=y
-CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE="console=ttyS0,115200"
 # CONFIG_SECCOMP is not set
 CONFIG_NET=y
diff --git a/arch/powerpc/crypto/crc32-vpmsum_core.S b/arch/powerpc/crypto/crc32-vpmsum_core.S
index c3524eb..a16a717 100644
--- a/arch/powerpc/crypto/crc32-vpmsum_core.S
+++ b/arch/powerpc/crypto/crc32-vpmsum_core.S
@@ -19,7 +19,7 @@
  * We then use fixed point Barrett reduction to compute a mod n over GF(2)
  * for n = CRC using POWER8 instructions. We use x = 32.
  *
- * http://en.wikipedia.org/wiki/Barrett_reduction
+ * https://en.wikipedia.org/wiki/Barrett_reduction
  *
  * Copyright (C) 2015 Anton Blanchard <anton@au.ibm.com>, IBM
 */
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 2d444d0..90cd5c5 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -7,5 +7,6 @@
 generic-y += kvm_types.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
+generic-y += qrwlock.h
 generic-y += vtime.h
 generic-y += early_ioremap.h
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index 7d81e86..de14b1a 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -98,7 +98,7 @@
 void __init machine_init(u64 dt_ptr);
 #endif
 long system_call_exception(long r3, long r4, long r5, long r6, long r7, long r8, unsigned long r0, struct pt_regs *regs);
-notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs);
+notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs *regs, long scv);
 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs, unsigned long msr);
 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr);
 
@@ -144,13 +144,13 @@
 void _kvmppc_save_tm_pr(struct kvm_vcpu *vcpu, u64 guest_msr);
 
 /* Patch sites */
-extern s32 patch__call_flush_count_cache;
+extern s32 patch__call_flush_branch_caches;
 extern s32 patch__flush_count_cache_return;
 extern s32 patch__flush_link_stack_return;
 extern s32 patch__call_kvm_flush_link_stack;
 extern s32 patch__memset_nocache, patch__memcpy_nocache;
 
-extern long flush_count_cache;
+extern long flush_branch_caches;
 extern long kvm_flush_link_stack;
 
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h
index 0311c3c..8a55eb8 100644
--- a/arch/powerpc/include/asm/atomic.h
+++ b/arch/powerpc/include/asm/atomic.h
@@ -191,6 +191,34 @@
 #define atomic_xchg(v, new) (xchg(&((v)->counter), new))
 #define atomic_xchg_relaxed(v, new) xchg_relaxed(&((v)->counter), (new))
 
+/*
+ * Don't want to override the generic atomic_try_cmpxchg_acquire, because
+ * we add a lock hint to the lwarx, which may not be wanted for the
+ * _acquire case (and is not used by the other _acquire variants so it
+ * would be a surprise).
+ */
+static __always_inline bool
+atomic_try_cmpxchg_lock(atomic_t *v, int *old, int new)
+{
+	int r, o = *old;
+
+	__asm__ __volatile__ (
+"1:\t"	PPC_LWARX(%0,0,%2,1) "	# atomic_try_cmpxchg_acquire	\n"
+"	cmpw	0,%0,%3							\n"
+"	bne-	2f							\n"
+"	stwcx.	%4,0,%2							\n"
+"	bne-	1b							\n"
+"\t"	PPC_ACQUIRE_BARRIER "						\n"
+"2:									\n"
+	: "=&r" (r), "+m" (v->counter)
+	: "r" (&v->counter), "r" (o), "r" (new)
+	: "cr0", "memory");
+
+	if (unlikely(r != o))
+		*old = r;
+	return likely(r == o);
+}
+
 /**
  * atomic_fetch_add_unless - add unless the number is a given value
  * @v: pointer of type atomic_t
diff --git a/arch/powerpc/include/asm/barrier.h b/arch/powerpc/include/asm/barrier.h
index 123adce..f53c423 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -7,6 +7,10 @@
 
 #include <asm/asm-const.h>
 
+#ifndef __ASSEMBLY__
+#include <asm/ppc-opcode.h>
+#endif
+
 /*
  * Memory barrier.
  * The sync instruction guarantees that all memory accesses initiated
@@ -76,6 +80,22 @@
 	___p1;								\
 })
 
+#ifdef CONFIG_PPC64
+#define smp_cond_load_relaxed(ptr, cond_expr) ({		\
+	typeof(ptr) __PTR = (ptr);				\
+	__unqual_scalar_typeof(*ptr) VAL;			\
+	VAL = READ_ONCE(*__PTR);				\
+	if (unlikely(!(cond_expr))) {				\
+		spin_begin();					\
+		do {						\
+			VAL = READ_ONCE(*__PTR);		\
+		} while (!(cond_expr));				\
+		spin_end();					\
+	}							\
+	(typeof(*ptr))VAL;					\
+})
+#endif
+
 #ifdef CONFIG_PPC_BOOK3S_64
 #define NOSPEC_BARRIER_SLOT   nop
 #elif defined(CONFIG_PPC_FSL_BOOK3E)
@@ -97,6 +117,15 @@
 #define barrier_nospec()
 #endif /* CONFIG_PPC_BARRIER_NOSPEC */
 
+/*
+ * pmem_wmb() ensures that all stores for which the modification
+ * are written to persistent storage by preceding dcbfps/dcbstps
+ * instructions have updated persistent storage before any data
+ * access or data transfer caused by subsequent instructions is
+ * initiated.
+ */
+#define pmem_wmb() __asm__ __volatile__(PPC_PHWSYNC ::: "memory")
+
 #include <asm-generic/barrier.h>
 
 #endif /* _ASM_POWERPC_BARRIER_H */
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 2249124..36443cd 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -184,17 +184,7 @@
  */
 #define VMALLOC_OFFSET (0x1000000) /* 16M */
 
-/*
- * With CONFIG_STRICT_KERNEL_RWX, kernel segments are set NX. But when modules
- * are used, NX cannot be set on VMALLOC space. So vmalloc VM space and linear
- * memory shall not share segments.
- */
-#if defined(CONFIG_STRICT_KERNEL_RWX) && defined(CONFIG_MODULES)
-#define VMALLOC_START ((ALIGN((long)high_memory, 256L << 20) + VMALLOC_OFFSET) & \
-		       ~(VMALLOC_OFFSET - 1))
-#else
 #define VMALLOC_START ((((long)high_memory + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)))
-#endif
 
 #ifdef CONFIG_KASAN_VMALLOC
 #define VMALLOC_END	ALIGN_DOWN(ioremap_bot, PAGE_SIZE << KASAN_SHADOW_SCALE_SHIFT)
@@ -202,6 +192,11 @@
 #define VMALLOC_END	ioremap_bot
 #endif
 
+#ifdef CONFIG_STRICT_KERNEL_RWX
+#define MODULES_END	ALIGN_DOWN(PAGE_OFFSET, SZ_256M)
+#define MODULES_VADDR	(MODULES_END - SZ_256M)
+#endif
+
 #ifndef __ASSEMBLY__
 #include <linux/sched.h>
 #include <linux/threads.h>
diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
index 3f9ae35..082b988 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
@@ -34,11 +34,11 @@
 #define H_PUD_TABLE_SIZE	(sizeof(pud_t) << H_PUD_INDEX_SIZE)
 #define H_PGD_TABLE_SIZE	(sizeof(pgd_t) << H_PGD_INDEX_SIZE)
 
-#define H_PAGE_F_GIX_SHIFT	53
-#define H_PAGE_F_SECOND	_RPAGE_RPN44	/* HPTE is in 2ndary HPTEG */
-#define H_PAGE_F_GIX	(_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41)
-#define H_PAGE_BUSY	_RPAGE_RSV1     /* software: PTE & hash are busy */
-#define H_PAGE_HASHPTE	_RPAGE_RSV2     /* software: PTE & hash are busy */
+#define H_PAGE_F_GIX_SHIFT	_PAGE_PA_MAX
+#define H_PAGE_F_SECOND		_RPAGE_PKEY_BIT0 /* HPTE is in 2ndary HPTEG */
+#define H_PAGE_F_GIX		(_RPAGE_RPN43 | _RPAGE_RPN42 | _RPAGE_RPN41)
+#define H_PAGE_BUSY		_RPAGE_RSV1
+#define H_PAGE_HASHPTE		_RPAGE_PKEY_BIT4
 
 /* PTE flags to conserve for HPTE identification */
 #define _PAGE_HPTEFLAGS (H_PAGE_BUSY | H_PAGE_HASHPTE | \
@@ -57,11 +57,12 @@
 #define H_PMD_FRAG_NR	(PAGE_SIZE >> H_PMD_FRAG_SIZE_SHIFT)
 
 /* memory key bits, only 8 keys supported */
-#define H_PTE_PKEY_BIT0	0
-#define H_PTE_PKEY_BIT1	0
-#define H_PTE_PKEY_BIT2	_RPAGE_RSV3
-#define H_PTE_PKEY_BIT3	_RPAGE_RSV4
-#define H_PTE_PKEY_BIT4	_RPAGE_RSV5
+#define H_PTE_PKEY_BIT4	0
+#define H_PTE_PKEY_BIT3	0
+#define H_PTE_PKEY_BIT2	_RPAGE_PKEY_BIT3
+#define H_PTE_PKEY_BIT1	_RPAGE_PKEY_BIT2
+#define H_PTE_PKEY_BIT0	_RPAGE_PKEY_BIT1
+
 
 /*
  * On all 4K setups, remap_4k_pfn() equates to remap_pfn_range()
diff --git a/arch/powerpc/include/asm/book3s/64/hash-64k.h b/arch/powerpc/include/asm/book3s/64/hash-64k.h
index 0729c03..f20de11 100644
--- a/arch/powerpc/include/asm/book3s/64/hash-64k.h
+++ b/arch/powerpc/include/asm/book3s/64/hash-64k.h
@@ -32,15 +32,15 @@
  */
 #define H_PAGE_COMBO	_RPAGE_RPN0 /* this is a combo 4k page */
 #define H_PAGE_4K_PFN	_RPAGE_RPN1 /* PFN is for a single 4k page */
-#define H_PAGE_BUSY	_RPAGE_RPN44     /* software: PTE & hash are busy */
+#define H_PAGE_BUSY	_RPAGE_RSV1     /* software: PTE & hash are busy */
 #define H_PAGE_HASHPTE	_RPAGE_RPN43	/* PTE has associated HPTE */
 
 /* memory key bits. */
-#define H_PTE_PKEY_BIT0	_RPAGE_RSV1
-#define H_PTE_PKEY_BIT1	_RPAGE_RSV2
-#define H_PTE_PKEY_BIT2	_RPAGE_RSV3
-#define H_PTE_PKEY_BIT3	_RPAGE_RSV4
-#define H_PTE_PKEY_BIT4	_RPAGE_RSV5
+#define H_PTE_PKEY_BIT4		_RPAGE_PKEY_BIT4
+#define H_PTE_PKEY_BIT3		_RPAGE_PKEY_BIT3
+#define H_PTE_PKEY_BIT2		_RPAGE_PKEY_BIT2
+#define H_PTE_PKEY_BIT1		_RPAGE_PKEY_BIT1
+#define H_PTE_PKEY_BIT0		_RPAGE_PKEY_BIT0
 
 /*
  * We need to differentiate between explicit huge page and THP huge
diff --git a/arch/powerpc/include/asm/book3s/64/hash-pkey.h b/arch/powerpc/include/asm/book3s/64/hash-pkey.h
new file mode 100644
index 0000000..7950108
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/hash-pkey.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H
+#define _ASM_POWERPC_BOOK3S_64_HASH_PKEY_H
+
+static inline u64 hash__vmflag_to_pte_pkey_bits(u64 vm_flags)
+{
+	return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) |
+		((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT1 : 0x0UL) |
+		((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) |
+		((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT3 : 0x0UL) |
+		((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT4 : 0x0UL));
+}
+
+static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
+{
+	return (((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL));
+}
+
+static inline u16 hash__pte_to_pkey_bits(u64 pteflags)
+{
+	return (((pteflags & H_PTE_PKEY_BIT4) ? 0x10 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT3) ? 0x8 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT1) ? 0x2 : 0x0UL) |
+		((pteflags & H_PTE_PKEY_BIT0) ? 0x1 : 0x0UL));
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/kexec.h b/arch/powerpc/include/asm/book3s/64/kexec.h
new file mode 100644
index 0000000..6b5c3a2
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/kexec.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_POWERPC_BOOK3S_64_KEXEC_H_
+#define _ASM_POWERPC_BOOK3S_64_KEXEC_H_
+
+
+#define reset_sprs reset_sprs
+static inline void reset_sprs(void)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+		mtspr(SPRN_AMR, 0);
+		mtspr(SPRN_UAMOR, 0);
+	}
+
+	if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
+		mtspr(SPRN_IAMR, 0);
+	}
+
+	/*  Do we need isync()? We are going via a kexec reset */
+	isync();
+}
+
+#endif
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 3fa1b96..93d18da 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -86,8 +86,8 @@
 #define HPTE_R_PP0		ASM_CONST(0x8000000000000000)
 #define HPTE_R_TS		ASM_CONST(0x4000000000000000)
 #define HPTE_R_KEY_HI		ASM_CONST(0x3000000000000000)
-#define HPTE_R_KEY_BIT0		ASM_CONST(0x2000000000000000)
-#define HPTE_R_KEY_BIT1		ASM_CONST(0x1000000000000000)
+#define HPTE_R_KEY_BIT4		ASM_CONST(0x2000000000000000)
+#define HPTE_R_KEY_BIT3		ASM_CONST(0x1000000000000000)
 #define HPTE_R_RPN_SHIFT	12
 #define HPTE_R_RPN		ASM_CONST(0x0ffffffffffff000)
 #define HPTE_R_RPN_3_0		ASM_CONST(0x01fffffffffff000)
@@ -103,8 +103,8 @@
 #define HPTE_R_R		ASM_CONST(0x0000000000000100)
 #define HPTE_R_KEY_LO		ASM_CONST(0x0000000000000e00)
 #define HPTE_R_KEY_BIT2		ASM_CONST(0x0000000000000800)
-#define HPTE_R_KEY_BIT3		ASM_CONST(0x0000000000000400)
-#define HPTE_R_KEY_BIT4		ASM_CONST(0x0000000000000200)
+#define HPTE_R_KEY_BIT1		ASM_CONST(0x0000000000000400)
+#define HPTE_R_KEY_BIT0		ASM_CONST(0x0000000000000200)
 #define HPTE_R_KEY		(HPTE_R_KEY_LO | HPTE_R_KEY_HI)
 
 #define HPTE_V_1TB_SEG		ASM_CONST(0x4000000000000000)
@@ -793,7 +793,7 @@
 }
 
 /*
- * For kernel space, we use context ids as below
+ * For kernel space, we use context ids as
  * below. Range is 512TB per context.
  *
  * 0x00001 -  [ 0xc000000000000000 - 0xc001ffffffffffff]
diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h
index 5393a53..55442d4 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -82,6 +82,11 @@
 /* Base PID to allocate from */
 extern unsigned int mmu_base_pid;
 
+/*
+ * memory block size used with radix translation.
+ */
+extern unsigned int __ro_after_init radix_mem_block_size;
+
 #define PRTB_SIZE_SHIFT	(mmu_pid_bits + 4)
 #define PRTB_ENTRIES	(1ul << mmu_pid_bits)
 
@@ -209,6 +214,12 @@
 void mmu_early_init_devtree(void);
 void hash__early_init_devtree(void);
 void radix__early_init_devtree(void);
+#ifdef CONFIG_PPC_MEM_KEYS
+void pkey_early_init_devtree(void);
+#else
+static inline void pkey_early_init_devtree(void) {}
+#endif
+
 extern void hash__early_init_mmu(void);
 extern void radix__early_init_mmu(void);
 static inline void __init early_init_mmu(void)
diff --git a/arch/powerpc/include/asm/book3s/64/pgalloc.h b/arch/powerpc/include/asm/book3s/64/pgalloc.h
index 69c5b05..e1af0b3 100644
--- a/arch/powerpc/include/asm/book3s/64/pgalloc.h
+++ b/arch/powerpc/include/asm/book3s/64/pgalloc.h
@@ -107,9 +107,23 @@
 	return pud;
 }
 
+static inline void __pud_free(pud_t *pud)
+{
+	struct page *page = virt_to_page(pud);
+
+	/*
+	 * Early pud pages allocated via memblock allocator
+	 * can't be directly freed to slab
+	 */
+	if (PageReserved(page))
+		free_reserved_page(page);
+	else
+		kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
+}
+
 static inline void pud_free(struct mm_struct *mm, pud_t *pud)
 {
-	kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), pud);
+	return __pud_free(pud);
 }
 
 static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 25c3cb8..6de56c3b 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -20,9 +20,13 @@
 #define _PAGE_RW		(_PAGE_READ | _PAGE_WRITE)
 #define _PAGE_RWX		(_PAGE_READ | _PAGE_WRITE | _PAGE_EXEC)
 #define _PAGE_PRIVILEGED	0x00008 /* kernel access only */
-#define _PAGE_SAO		0x00010 /* Strong access order */
+
+#define _PAGE_CACHE_CTL		0x00030 /* Bits for the folowing cache modes */
+			/*	No bits set is normal cacheable memory */
+			/*	0x00010 unused, is SAO bit on radix POWER9 */
 #define _PAGE_NON_IDEMPOTENT	0x00020 /* non idempotent memory */
 #define _PAGE_TOLERANT		0x00030 /* tolerant memory, cache inhibited */
+
 #define _PAGE_DIRTY		0x00080 /* C: page changed */
 #define _PAGE_ACCESSED		0x00100 /* R: page referenced */
 /*
@@ -32,11 +36,13 @@
 #define _RPAGE_SW1		0x00800
 #define _RPAGE_SW2		0x00400
 #define _RPAGE_SW3		0x00200
-#define _RPAGE_RSV1		0x1000000000000000UL
-#define _RPAGE_RSV2		0x0800000000000000UL
-#define _RPAGE_RSV3		0x0400000000000000UL
-#define _RPAGE_RSV4		0x0200000000000000UL
-#define _RPAGE_RSV5		0x00040UL
+#define _RPAGE_RSV1		0x00040UL
+
+#define _RPAGE_PKEY_BIT4	0x1000000000000000UL
+#define _RPAGE_PKEY_BIT3	0x0800000000000000UL
+#define _RPAGE_PKEY_BIT2	0x0400000000000000UL
+#define _RPAGE_PKEY_BIT1	0x0200000000000000UL
+#define _RPAGE_PKEY_BIT0	0x0100000000000000UL
 
 #define _PAGE_PTE		0x4000000000000000UL	/* distinguishes PTEs from pointers */
 #define _PAGE_PRESENT		0x8000000000000000UL	/* pte contains a translation */
@@ -58,13 +64,12 @@
  */
 #define _RPAGE_RPN0		0x01000
 #define _RPAGE_RPN1		0x02000
-#define _RPAGE_RPN44		0x0100000000000000UL
 #define _RPAGE_RPN43		0x0080000000000000UL
 #define _RPAGE_RPN42		0x0040000000000000UL
 #define _RPAGE_RPN41		0x0020000000000000UL
 
 /* Max physical address bit as per radix table */
-#define _RPAGE_PA_MAX		57
+#define _RPAGE_PA_MAX		56
 
 /*
  * Max physical address bit we will use for now.
@@ -125,8 +130,6 @@
 			 _PAGE_ACCESSED | _PAGE_SPECIAL | _PAGE_PTE |	\
 			 _PAGE_SOFT_DIRTY | _PAGE_DEVMAP)
 
-#define H_PTE_PKEY  (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
-		     H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
 /*
  * We define 2 sets of base prot bits, one for basic pages (ie,
  * cacheable kernel and user pages) and one for non cacheable
@@ -825,8 +828,6 @@
 	return hash__set_pte_at(mm, addr, ptep, pte, percpu);
 }
 
-#define _PAGE_CACHE_CTL	(_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT)
-
 #define pgprot_noncached pgprot_noncached
 static inline pgprot_t pgprot_noncached(pgprot_t prot)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/pkeys.h b/arch/powerpc/include/asm/book3s/64/pkeys.h
new file mode 100644
index 0000000..b7d9f42
--- /dev/null
+++ b/arch/powerpc/include/asm/book3s/64/pkeys.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _ASM_POWERPC_BOOK3S_64_PKEYS_H
+#define _ASM_POWERPC_BOOK3S_64_PKEYS_H
+
+#include <asm/book3s/64/hash-pkey.h>
+
+extern u64 __ro_after_init default_uamor;
+
+static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags)
+{
+	if (!mmu_has_feature(MMU_FTR_PKEY))
+		return 0x0UL;
+
+	if (radix_enabled())
+		BUG();
+	return hash__vmflag_to_pte_pkey_bits(vm_flags);
+}
+
+static inline u16 pte_to_pkey_bits(u64 pteflags)
+{
+	if (radix_enabled())
+		BUG();
+	return hash__pte_to_pkey_bits(pteflags);
+}
+
+#endif /*_ASM_POWERPC_KEYS_H */
diff --git a/arch/powerpc/include/asm/book3s/64/radix-4k.h b/arch/powerpc/include/asm/book3s/64/radix-4k.h
index d5f5ab7..035ceec 100644
--- a/arch/powerpc/include/asm/book3s/64/radix-4k.h
+++ b/arch/powerpc/include/asm/book3s/64/radix-4k.h
@@ -11,7 +11,7 @@
 #define RADIX_PGD_INDEX_SIZE  13  // size: 8B << 13 = 64KB, maps 2^13 x 512GB =   4PB
 
 /*
- * One fragment per per page
+ * One fragment per page
  */
 #define RADIX_PTE_FRAG_SIZE_SHIFT  (RADIX_PTE_INDEX_SIZE + 3)
 #define RADIX_PTE_FRAG_NR	(PAGE_SIZE >> RADIX_PTE_FRAG_SIZE_SHIFT)
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
index ca8db19..94439e0 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-radix.h
@@ -2,10 +2,25 @@
 #ifndef _ASM_POWERPC_TLBFLUSH_RADIX_H
 #define _ASM_POWERPC_TLBFLUSH_RADIX_H
 
+#include <asm/hvcall.h>
+
 struct vm_area_struct;
 struct mm_struct;
 struct mmu_gather;
 
+static inline u64 psize_to_rpti_pgsize(unsigned long psize)
+{
+	if (psize == MMU_PAGE_4K)
+		return H_RPTI_PAGE_4K;
+	if (psize == MMU_PAGE_64K)
+		return H_RPTI_PAGE_64K;
+	if (psize == MMU_PAGE_2M)
+		return H_RPTI_PAGE_2M;
+	if (psize == MMU_PAGE_1G)
+		return H_RPTI_PAGE_1G;
+	return H_RPTI_PAGE_ALL;
+}
+
 static inline int mmu_get_ap(int psize)
 {
 	return mmu_psize_defs[psize].ap;
diff --git a/arch/powerpc/include/asm/cacheflush.h b/arch/powerpc/include/asm/cacheflush.h
index de600b9..54764c6 100644
--- a/arch/powerpc/include/asm/cacheflush.h
+++ b/arch/powerpc/include/asm/cacheflush.h
@@ -6,6 +6,7 @@
 
 #include <linux/mm.h>
 #include <asm/cputable.h>
+#include <asm/cpu_has_feature.h>
 
 #ifdef CONFIG_PPC_BOOK3S_64
 /*
diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h
index bac2252..fdddb82 100644
--- a/arch/powerpc/include/asm/cputable.h
+++ b/arch/powerpc/include/asm/cputable.h
@@ -191,14 +191,14 @@
 #define CPU_FTR_SPURR			LONG_ASM_CONST(0x0000000001000000)
 #define CPU_FTR_DSCR			LONG_ASM_CONST(0x0000000002000000)
 #define CPU_FTR_VSX			LONG_ASM_CONST(0x0000000004000000)
-#define CPU_FTR_SAO			LONG_ASM_CONST(0x0000000008000000)
+// Free					LONG_ASM_CONST(0x0000000008000000)
 #define CPU_FTR_CP_USE_DCBTZ		LONG_ASM_CONST(0x0000000010000000)
 #define CPU_FTR_UNALIGNED_LD_STD	LONG_ASM_CONST(0x0000000020000000)
 #define CPU_FTR_ASYM_SMT		LONG_ASM_CONST(0x0000000040000000)
 #define CPU_FTR_STCX_CHECKS_ADDRESS	LONG_ASM_CONST(0x0000000080000000)
 #define CPU_FTR_POPCNTB			LONG_ASM_CONST(0x0000000100000000)
 #define CPU_FTR_POPCNTD			LONG_ASM_CONST(0x0000000200000000)
-#define CPU_FTR_PKEY			LONG_ASM_CONST(0x0000000400000000)
+/* LONG_ASM_CONST(0x0000000400000000) Free */
 #define CPU_FTR_VMX_COPY		LONG_ASM_CONST(0x0000000800000000)
 #define CPU_FTR_TM			LONG_ASM_CONST(0x0000001000000000)
 #define CPU_FTR_CFAR			LONG_ASM_CONST(0x0000002000000000)
@@ -214,6 +214,7 @@
 #define CPU_FTR_P9_TLBIE_ERAT_BUG	LONG_ASM_CONST(0x0001000000000000)
 #define CPU_FTR_P9_RADIX_PREFETCH_BUG	LONG_ASM_CONST(0x0002000000000000)
 #define CPU_FTR_ARCH_31			LONG_ASM_CONST(0x0004000000000000)
+#define CPU_FTR_DAWR1			LONG_ASM_CONST(0x0008000000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -435,32 +436,32 @@
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_SAO  | CPU_FTR_ASYM_SMT | \
+	    CPU_FTR_DSCR | CPU_FTR_ASYM_SMT | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | \
-	    CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX | CPU_FTR_PKEY)
+	    CPU_FTR_VMX_COPY | CPU_FTR_HAS_PPR | CPU_FTR_DABRX )
 #define CPU_FTRS_POWER8 (CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
+	    CPU_FTR_DSCR | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
-	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_PKEY)
+	    CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP )
 #define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
 #define CPU_FTRS_POWER9 (CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | CPU_FTR_ARCH_206 |\
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
+	    CPU_FTR_DSCR | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
-	    CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
-	    CPU_FTR_P9_TLBIE_STQ_BUG | CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
+	    CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_P9_TLBIE_STQ_BUG | \
+	    CPU_FTR_P9_TLBIE_ERAT_BUG | CPU_FTR_P9_TIDR)
 #define CPU_FTRS_POWER9_DD2_0 (CPU_FTRS_POWER9 | CPU_FTR_P9_RADIX_PREFETCH_BUG)
 #define CPU_FTRS_POWER9_DD2_1 (CPU_FTRS_POWER9 | \
 			       CPU_FTR_P9_RADIX_PREFETCH_BUG | \
@@ -473,12 +474,12 @@
 	    CPU_FTR_MMCRA | CPU_FTR_SMT | \
 	    CPU_FTR_COHERENT_ICACHE | \
 	    CPU_FTR_PURR | CPU_FTR_SPURR | CPU_FTR_REAL_LE | \
-	    CPU_FTR_DSCR | CPU_FTR_SAO  | \
+	    CPU_FTR_DSCR | \
 	    CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
 	    CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
 	    CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_ARCH_207S | \
-	    CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_PKEY | \
-	    CPU_FTR_ARCH_31)
+	    CPU_FTR_TM_COMP | CPU_FTR_ARCH_300 | CPU_FTR_ARCH_31 | \
+	    CPU_FTR_DAWR | CPU_FTR_DAWR1)
 #define CPU_FTRS_CELL	(CPU_FTR_LWSYNC | \
 	    CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
@@ -628,9 +629,10 @@
 
 /*
  * Maximum number of hw breakpoint supported on powerpc. Number of
- * breakpoints supported by actual hw might be less than this.
+ * breakpoints supported by actual hw might be less than this, which
+ * is decided at run time in nr_wp_slots().
  */
-#define HBP_NUM_MAX	1
+#define HBP_NUM_MAX	2
 
 #endif /* !__ASSEMBLY__ */
 
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 0fccd5e..ed75d1c 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -36,6 +36,8 @@
 	return mulhdu((__force u64) ct, __cputime_usec_factor);
 }
 
+#define cputime_to_nsecs(cputime) tb_to_ns((__force u64)cputime)
+
 /*
  * PPC64 uses PACA which is task independent for storing accounting data while
  * PPC32 uses struct thread_info, therefore at task switch the accounting data
@@ -65,7 +67,7 @@
 
 /*
  * account_cpu_user_entry/exit runs "unreconciled", so can't trace,
- * can't use use get_paca()
+ * can't use get_paca()
  */
 static notrace inline void account_cpu_user_entry(void)
 {
diff --git a/arch/powerpc/include/asm/crashdump-ppc64.h b/arch/powerpc/include/asm/crashdump-ppc64.h
new file mode 100644
index 0000000..68d9717
--- /dev/null
+++ b/arch/powerpc/include/asm/crashdump-ppc64.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_CRASHDUMP_PPC64_H
+#define _ASM_POWERPC_CRASHDUMP_PPC64_H
+
+/*
+ * Backup region - first 64KB of System RAM
+ *
+ * If ever the below macros are to be changed, please be judicious.
+ * The implicit assumptions are:
+ *     - start, end & size are less than UINT32_MAX.
+ *     - start & size are at least 8 byte aligned.
+ *
+ * For implementation details: arch/powerpc/purgatory/trampoline_64.S
+ */
+#define BACKUP_SRC_START	0
+#define BACKUP_SRC_END		0xffff
+#define BACKUP_SRC_SIZE		(BACKUP_SRC_END - BACKUP_SRC_START + 1)
+
+#endif /* __ASM_POWERPC_CRASHDUMP_PPC64_H */
diff --git a/arch/powerpc/include/asm/dbell.h b/arch/powerpc/include/asm/dbell.h
index 4ce6808..3e9da22 100644
--- a/arch/powerpc/include/asm/dbell.h
+++ b/arch/powerpc/include/asm/dbell.h
@@ -11,8 +11,10 @@
 #include <linux/smp.h>
 #include <linux/threads.h>
 
+#include <asm/cputhreads.h>
 #include <asm/ppc-opcode.h>
 #include <asm/feature-fixups.h>
+#include <asm/kvm_ppc.h>
 
 #define PPC_DBELL_MSG_BRDCAST	(0x04000000)
 #define PPC_DBELL_TYPE(x)	(((x) & 0xf) << (63-36))
@@ -87,9 +89,6 @@
 
 #endif /* CONFIG_PPC_BOOK3S */
 
-extern void doorbell_global_ipi(int cpu);
-extern void doorbell_core_ipi(int cpu);
-extern int doorbell_try_core_ipi(int cpu);
 extern void doorbell_exception(struct pt_regs *regs);
 
 static inline void ppc_msgsnd(enum ppc_dbell type, u32 flags, u32 tag)
@@ -100,4 +99,63 @@
 	_ppc_msgsnd(msg);
 }
 
+#ifdef CONFIG_SMP
+
+/*
+ * Doorbells must only be used if CPU_FTR_DBELL is available.
+ * msgsnd is used in HV, and msgsndp is used in !HV.
+ *
+ * These should be used by platform code that is aware of restrictions.
+ * Other arch code should use ->cause_ipi.
+ *
+ * doorbell_global_ipi() sends a dbell to any target CPU.
+ * Must be used only by architectures that address msgsnd target
+ * by PIR/get_hard_smp_processor_id.
+ */
+static inline void doorbell_global_ipi(int cpu)
+{
+	u32 tag = get_hard_smp_processor_id(cpu);
+
+	kvmppc_set_host_ipi(cpu);
+	/* Order previous accesses vs. msgsnd, which is treated as a store */
+	ppc_msgsnd_sync();
+	ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+}
+
+/*
+ * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
+ * Must be used only by architectures that address msgsnd target
+ * by TIR/cpu_thread_in_core.
+ */
+static inline void doorbell_core_ipi(int cpu)
+{
+	u32 tag = cpu_thread_in_core(cpu);
+
+	kvmppc_set_host_ipi(cpu);
+	/* Order previous accesses vs. msgsnd, which is treated as a store */
+	ppc_msgsnd_sync();
+	ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
+}
+
+/*
+ * Attempt to cause a core doorbell if destination is on the same core.
+ * Returns 1 on success, 0 on failure.
+ */
+static inline int doorbell_try_core_ipi(int cpu)
+{
+	int this_cpu = get_cpu();
+	int ret = 0;
+
+	if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
+		doorbell_core_ipi(cpu);
+		ret = 1;
+	}
+
+	put_cpu();
+
+	return ret;
+}
+
+#endif /* CONFIG_SMP */
+
 #endif /* _ASM_POWERPC_DBELL_H */
diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h
index 45240221..d8a0729 100644
--- a/arch/powerpc/include/asm/device.h
+++ b/arch/powerpc/include/asm/device.h
@@ -44,6 +44,9 @@
 #ifdef CONFIG_CXL_BASE
 	struct cxl_context	*cxl_ctx;
 #endif
+#ifdef CONFIG_PCI_IOV
+	void *iov_data;
+#endif
 };
 
 struct pdev_archdata {
diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index 414d209..17ccc64 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -90,13 +90,14 @@
 }
 
 u64 drmem_lmb_memory_max(void);
-void __init walk_drmem_lmbs(struct device_node *dn,
-			void (*func)(struct drmem_lmb *, const __be32 **));
+int walk_drmem_lmbs(struct device_node *dn, void *data,
+		    int (*func)(struct drmem_lmb *, const __be32 **, void *));
 int drmem_update_dt(void);
 
 #ifdef CONFIG_PPC_PSERIES
-void __init walk_drmem_lmbs_early(unsigned long node,
-			void (*func)(struct drmem_lmb *, const __be32 **));
+int __init
+walk_drmem_lmbs_early(unsigned long node, void *data,
+		      int (*func)(struct drmem_lmb *, const __be32 **, void *));
 #endif
 
 static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index 964a542..d5f369b 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -133,7 +133,6 @@
 
 struct eeh_dev {
 	int mode;			/* EEH mode			*/
-	int class_code;			/* Class code of the device	*/
 	int bdfn;			/* bdfn of device (for cfg ops) */
 	struct pci_controller *controller;
 	int pe_config_addr;		/* PE config address		*/
@@ -148,7 +147,10 @@
 	struct pci_dn *pdn;		/* Associated PCI device node	*/
 	struct pci_dev *pdev;		/* Associated PCI device	*/
 	bool in_error;			/* Error flag for edev		*/
+
+	/* VF specific properties */
 	struct pci_dev *physfn;		/* Associated SRIOV PF		*/
+	int vf_index;			/* Index of this VF 		*/
 };
 
 /* "fmt" must be a simple literal string */
@@ -217,18 +219,17 @@
 	int (*init)(void);
 	struct eeh_dev *(*probe)(struct pci_dev *pdev);
 	int (*set_option)(struct eeh_pe *pe, int option);
-	int (*get_pe_addr)(struct eeh_pe *pe);
 	int (*get_state)(struct eeh_pe *pe, int *delay);
 	int (*reset)(struct eeh_pe *pe, int option);
 	int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
 	int (*configure_bridge)(struct eeh_pe *pe);
 	int (*err_inject)(struct eeh_pe *pe, int type, int func,
 			  unsigned long addr, unsigned long mask);
-	int (*read_config)(struct pci_dn *pdn, int where, int size, u32 *val);
-	int (*write_config)(struct pci_dn *pdn, int where, int size, u32 val);
+	int (*read_config)(struct eeh_dev *edev, int where, int size, u32 *val);
+	int (*write_config)(struct eeh_dev *edev, int where, int size, u32 val);
 	int (*next_error)(struct eeh_pe **pe);
-	int (*restore_config)(struct pci_dn *pdn);
-	int (*notify_resume)(struct pci_dn *pdn);
+	int (*restore_config)(struct eeh_dev *edev);
+	int (*notify_resume)(struct eeh_dev *edev);
 };
 
 extern int eeh_subsystem_flags;
@@ -282,8 +283,8 @@
 struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
 struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
 			  int pe_no, int config_addr);
-int eeh_add_to_parent_pe(struct eeh_dev *edev);
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev);
+int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent);
+int eeh_pe_tree_remove(struct eeh_dev *edev);
 void eeh_pe_update_time_stamp(struct eeh_pe *pe);
 void *eeh_pe_traverse(struct eeh_pe *root,
 		      eeh_pe_traverse_func fn, void *flag);
@@ -293,8 +294,6 @@
 const char *eeh_pe_loc_get(struct eeh_pe *pe);
 struct pci_bus *eeh_pe_bus_get(struct eeh_pe *pe);
 
-struct eeh_dev *eeh_dev_init(struct pci_dn *pdn);
-void eeh_dev_phb_init_dynamic(struct pci_controller *phb);
 void eeh_show_enabled(void);
 int __init eeh_ops_register(struct eeh_ops *ops);
 int __exit eeh_ops_unregister(const char *name);
@@ -314,7 +313,6 @@
 int eeh_pe_configure(struct eeh_pe *pe);
 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func,
 		      unsigned long addr, unsigned long mask);
-int eeh_restore_vf_config(struct pci_dn *pdn);
 
 /**
  * EEH_POSSIBLE_ERROR() -- test for possible MMIO failure.
@@ -340,11 +338,6 @@
 
 static inline void eeh_show_enabled(void) { }
 
-static inline void *eeh_dev_init(struct pci_dn *pdn, void *data)
-{
-	return NULL;
-}
-
 static inline void eeh_dev_phb_init_dynamic(struct pci_controller *phb) { }
 
 static inline int eeh_check_failure(const volatile void __iomem *token)
@@ -362,6 +355,7 @@
 
 #define EEH_POSSIBLE_ERROR(val, type) (0)
 #define EEH_IO_ERROR_VALUE(size) (-1UL)
+static inline int eeh_phb_pe_create(struct pci_controller *phb) { return 0; }
 #endif /* CONFIG_EEH */
 
 #if defined(CONFIG_PPC_PSERIES) && defined(CONFIG_EEH)
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h
index d3a7e36..c99ba08 100644
--- a/arch/powerpc/include/asm/epapr_hcalls.h
+++ b/arch/powerpc/include/asm/epapr_hcalls.h
@@ -37,7 +37,7 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/* A "hypercall" is an "sc 1" instruction.  This header file file provides C
+/* A "hypercall" is an "sc 1" instruction.  This header file provides C
  * wrapper functions for the ePAPR hypervisor interface.  It is inteded
  * for use by Linux device drivers and other operating systems.
  *
@@ -246,7 +246,7 @@
  * ev_int_eoi - signal the end of interrupt processing
  * @interrupt: the interrupt number
  *
- * This function signals the end of processing for the the specified
+ * This function signals the end of processing for the specified
  * interrupt, which must be the interrupt currently in service. By
  * definition, this is also the highest-priority interrupt.
  *
diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h
index 54a98ef..40cdcb2 100644
--- a/arch/powerpc/include/asm/exception-64e.h
+++ b/arch/powerpc/include/asm/exception-64e.h
@@ -66,14 +66,7 @@
 #define EX_TLB_SRR0	(10 * 8)
 #define EX_TLB_SRR1	(11 * 8)
 #define EX_TLB_R7	(12 * 8)
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-#define EX_TLB_R8	(13 * 8)
-#define EX_TLB_R9	(14 * 8)
-#define EX_TLB_LR	(15 * 8)
-#define EX_TLB_SIZE	(16 * 8)
-#else
 #define EX_TLB_SIZE	(13 * 8)
-#endif
 
 #define	START_EXCEPTION(label)						\
 	.globl exc_##label##_book3e;					\
@@ -110,8 +103,7 @@
 	std	r11,EX_TLB_R12(r12);					    \
 	mtspr	SPRN_SPRG_TLB_EXFRAME,r14;				    \
 	std	r15,EX_TLB_SRR1(r12);					    \
-	std	r16,EX_TLB_SRR0(r12);					    \
-	TLB_MISS_PROLOG_STATS
+	std	r16,EX_TLB_SRR0(r12);
 
 /* And these are the matching epilogs that restores things
  *
@@ -143,7 +135,6 @@
 	mtspr	SPRN_SRR0,r15;						    \
 	ld	r15,EX_TLB_R15(r12);					    \
 	mtspr	SPRN_SRR1,r16;						    \
-	TLB_MISS_RESTORE_STATS						    \
 	ld	r16,EX_TLB_R16(r12);					    \
 	ld	r12,EX_TLB_R12(r12);					    \
 
@@ -158,53 +149,15 @@
 	addi	r11,r13,PACA_EXTLB;					    \
 	TLB_MISS_RESTORE(r11)
 
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-#define TLB_MISS_PROLOG_STATS						    \
-	mflr	r10;							    \
-	std	r8,EX_TLB_R8(r12);					    \
-	std	r9,EX_TLB_R9(r12);					    \
-	std	r10,EX_TLB_LR(r12);
-#define TLB_MISS_RESTORE_STATS					            \
-	ld	r16,EX_TLB_LR(r12);					    \
-	ld	r9,EX_TLB_R9(r12);					    \
-	ld	r8,EX_TLB_R8(r12);					    \
-	mtlr	r16;
-#define TLB_MISS_STATS_D(name)						    \
-	addi	r9,r13,MMSTAT_DSTATS+name;				    \
-	bl	tlb_stat_inc;
-#define TLB_MISS_STATS_I(name)						    \
-	addi	r9,r13,MMSTAT_ISTATS+name;				    \
-	bl	tlb_stat_inc;
-#define TLB_MISS_STATS_X(name)						    \
-	ld	r8,PACA_EXTLB+EX_TLB_ESR(r13);				    \
-	cmpdi	cr2,r8,-1;						    \
-	beq	cr2,61f;						    \
-	addi	r9,r13,MMSTAT_DSTATS+name;				    \
-	b	62f;							    \
-61:	addi	r9,r13,MMSTAT_ISTATS+name;				    \
-62:	bl	tlb_stat_inc;
-#define TLB_MISS_STATS_SAVE_INFO					    \
-	std	r14,EX_TLB_ESR(r12);	/* save ESR */
-#define TLB_MISS_STATS_SAVE_INFO_BOLTED					    \
-	std	r14,PACA_EXTLB+EX_TLB_ESR(r13);	/* save ESR */
-#else
-#define TLB_MISS_PROLOG_STATS
-#define TLB_MISS_RESTORE_STATS
-#define TLB_MISS_PROLOG_STATS_BOLTED
-#define TLB_MISS_RESTORE_STATS_BOLTED
-#define TLB_MISS_STATS_D(name)
-#define TLB_MISS_STATS_I(name)
-#define TLB_MISS_STATS_X(name)
-#define TLB_MISS_STATS_Y(name)
-#define TLB_MISS_STATS_SAVE_INFO
-#define TLB_MISS_STATS_SAVE_INFO_BOLTED
-#endif
-
 #define SET_IVOR(vector_number, vector_offset)	\
 	LOAD_REG_ADDR(r3,interrupt_base_book3e);\
 	ori	r3,r3,vector_offset@l;		\
 	mtspr	SPRN_IVOR##vector_number,r3;
-
+/*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * synchronisation instructions.
+ */
 #define RFI_TO_KERNEL							\
 	rfi
 
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 47bd4ea..ebe95aa 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -68,6 +68,14 @@
  *
  * The nop instructions allow us to insert one or more instructions to flush the
  * L1-D cache when returning to userspace or a guest.
+ *
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which hrfid, rfid, and rfscv are) to support ARCH_HAS_MEMBARRIER_SYNC_CORE
+ * without additional synchronisation instructions.
+ *
+ * soft-masked interrupt replay does not include a context-synchronising rfid,
+ * but those always return to kernel, the sync is only required when returning
+ * to user.
  */
 #define RFI_FLUSH_SLOT							\
 	RFI_FLUSH_FIXUP_SECTION;					\
@@ -123,6 +131,12 @@
 	hrfid;								\
 	b	hrfi_flush_fallback
 
+#define RFSCV_TO_USER							\
+	STF_EXIT_BARRIER_SLOT;						\
+	RFI_FLUSH_SLOT;							\
+	RFSCV;								\
+	b	rfscv_flush_fallback
+
 #endif /* __ASSEMBLY__ */
 
 #endif	/* _ASM_POWERPC_EXCEPTION_H */
diff --git a/arch/powerpc/include/asm/firmware.h b/arch/powerpc/include/asm/firmware.h
index 6003c2e..0b295bd 100644
--- a/arch/powerpc/include/asm/firmware.h
+++ b/arch/powerpc/include/asm/firmware.h
@@ -52,6 +52,7 @@
 #define FW_FEATURE_PAPR_SCM 	ASM_CONST(0x0000002000000000)
 #define FW_FEATURE_ULTRAVISOR	ASM_CONST(0x0000004000000000)
 #define FW_FEATURE_STUFF_TCE	ASM_CONST(0x0000008000000000)
+#define FW_FEATURE_RPT_INVALIDATE ASM_CONST(0x0000010000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -71,7 +72,8 @@
 		FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
 		FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
 		FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
-		FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR,
+		FW_FEATURE_PAPR_SCM | FW_FEATURE_ULTRAVISOR |
+		FW_FEATURE_RPT_INVALIDATE,
 	FW_FEATURE_PSERIES_ALWAYS = 0,
 	FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL | FW_FEATURE_ULTRAVISOR,
 	FW_FEATURE_POWERNV_ALWAYS = 0,
@@ -132,6 +134,12 @@
 
 extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup;
 
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+bool is_kvm_guest(void);
+#else
+static inline bool is_kvm_guest(void) { return false; }
+#endif
+
 #ifdef CONFIG_PPC_PSERIES
 void pseries_probe_fw_features(void);
 #else
diff --git a/arch/powerpc/include/asm/fixmap.h b/arch/powerpc/include/asm/fixmap.h
index 29188810..925cf89 100644
--- a/arch/powerpc/include/asm/fixmap.h
+++ b/arch/powerpc/include/asm/fixmap.h
@@ -52,7 +52,7 @@
 	FIX_HOLE,
 	/* reserve the top 128K for early debugging purposes */
 	FIX_EARLY_DEBUG_TOP = FIX_HOLE,
-	FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+((128*1024)/PAGE_SIZE)-1,
+	FIX_EARLY_DEBUG_BASE = FIX_EARLY_DEBUG_TOP+(ALIGN(SZ_128, PAGE_SIZE)/PAGE_SIZE)-1,
 #ifdef CONFIG_HIGHMEM
 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h
index f1e9067..f133b59 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -13,7 +13,6 @@
 	unsigned int pmu_irqs;
 	unsigned int mce_exceptions;
 	unsigned int spurious_irqs;
-	unsigned int hmi_exceptions;
 	unsigned int sreset_irqs;
 #ifdef CONFIG_PPC_WATCHDOG
 	unsigned int soft_nmi_irqs;
diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h
index 2dabcf6..4cb9efa 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -128,7 +128,7 @@
 	.if ((start) % (size) != 0);				\
 	.error "Fixed section exception vector misalignment";	\
 	.endif;							\
-	.if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100); \
+	.if ((size) != 0x20) && ((size) != 0x80) && ((size) != 0x100) && ((size) != 0x1000); \
 	.error "Fixed section exception vector bad size";	\
 	.endif;							\
 	.if (start) < sname##_start;				\
diff --git a/arch/powerpc/include/asm/hugetlb.h b/arch/powerpc/include/asm/hugetlb.h
index 551a9d4..013165e 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -57,6 +57,7 @@
 			       unsigned long addr, pte_t *ptep,
 			       pte_t pte, int dirty);
 
+void gigantic_hugetlb_cma_reserve(void) __init;
 #include <asm-generic/hugetlb.h>
 
 #else /* ! CONFIG_HUGETLB_PAGE */
@@ -71,6 +72,12 @@
 {
 	return NULL;
 }
+
+
+static inline void __init gigantic_hugetlb_cma_reserve(void)
+{
+}
+
 #endif /* CONFIG_HUGETLB_PAGE */
 
 #endif /* _ASM_POWERPC_HUGETLB_H */
diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h
index e90c073..fbb3770 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -237,7 +237,7 @@
 #define H_CREATE_RPT            0x1A4
 #define H_REMOVE_RPT            0x1A8
 #define H_REGISTER_RPAGES       0x1AC
-#define H_DISABLE_AND_GETC      0x1B0
+#define H_DISABLE_AND_GET       0x1B0
 #define H_ERROR_DATA            0x1B4
 #define H_GET_HCA_INFO          0x1B8
 #define H_GET_PERF_COUNT        0x1BC
@@ -305,7 +305,8 @@
 #define H_SCM_UNBIND_ALL        0x3FC
 #define H_SCM_HEALTH            0x400
 #define H_SCM_PERFORMANCE_STATS 0x418
-#define MAX_HCALL_OPCODE	H_SCM_PERFORMANCE_STATS
+#define H_RPT_INVALIDATE	0x448
+#define MAX_HCALL_OPCODE	H_RPT_INVALIDATE
 
 /* Scope args for H_SCM_UNBIND_ALL */
 #define H_UNBIND_SCOPE_ALL (0x1)
@@ -354,9 +355,10 @@
 
 /* Values for 2nd argument to H_SET_MODE */
 #define H_SET_MODE_RESOURCE_SET_CIABR		1
-#define H_SET_MODE_RESOURCE_SET_DAWR		2
+#define H_SET_MODE_RESOURCE_SET_DAWR0		2
 #define H_SET_MODE_RESOURCE_ADDR_TRANS_MODE	3
 #define H_SET_MODE_RESOURCE_LE			4
+#define H_SET_MODE_RESOURCE_SET_DAWR1		5
 
 /* Values for argument to H_SIGNAL_SYS_RESET */
 #define H_SIGNAL_SYS_RESET_ALL			-1
@@ -389,6 +391,37 @@
 #define PROC_TABLE_RADIX	0x04
 #define PROC_TABLE_GTSE		0x01
 
+/*
+ * Defines for
+ * H_RPT_INVALIDATE - Invalidate RPT translation lookaside information.
+ */
+
+/* Type of translation to invalidate (type) */
+#define H_RPTI_TYPE_NESTED	0x0001	/* Invalidate nested guest partition-scope */
+#define H_RPTI_TYPE_TLB		0x0002	/* Invalidate TLB */
+#define H_RPTI_TYPE_PWC		0x0004	/* Invalidate Page Walk Cache */
+/* Invalidate Process Table Entries if H_RPTI_TYPE_NESTED is clear */
+#define H_RPTI_TYPE_PRT		0x0008
+/* Invalidate Partition Table Entries if H_RPTI_TYPE_NESTED is set */
+#define H_RPTI_TYPE_PAT		0x0008
+#define H_RPTI_TYPE_ALL		(H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
+				 H_RPTI_TYPE_PRT)
+#define H_RPTI_TYPE_NESTED_ALL	(H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC | \
+				 H_RPTI_TYPE_PAT)
+
+/* Invalidation targets (target) */
+#define H_RPTI_TARGET_CMMU		0x01 /* All virtual processors in the partition */
+#define H_RPTI_TARGET_CMMU_LOCAL	0x02 /* Current virtual processor */
+/* All nest/accelerator agents in use by the partition */
+#define H_RPTI_TARGET_NMMU		0x04
+
+/* Page size mask (page sizes) */
+#define H_RPTI_PAGE_4K	0x01
+#define H_RPTI_PAGE_64K	0x02
+#define H_RPTI_PAGE_2M	0x04
+#define H_RPTI_PAGE_1G	0x08
+#define H_RPTI_PAGE_ALL (-1UL)
+
 #ifndef __ASSEMBLY__
 #include <linux/types.h>
 
diff --git a/arch/powerpc/include/asm/hw_breakpoint.h b/arch/powerpc/include/asm/hw_breakpoint.h
index cb42479..db206a7 100644
--- a/arch/powerpc/include/asm/hw_breakpoint.h
+++ b/arch/powerpc/include/asm/hw_breakpoint.h
@@ -9,6 +9,8 @@
 #ifndef _PPC_BOOK3S_64_HW_BREAKPOINT_H
 #define _PPC_BOOK3S_64_HW_BREAKPOINT_H
 
+#include <asm/cpu_has_feature.h>
+
 #ifdef	__KERNEL__
 struct arch_hw_breakpoint {
 	unsigned long	address;
@@ -17,7 +19,7 @@
 	u16		hw_len; /* length programmed in hw */
 };
 
-/* Note: Don't change the the first 6 bits below as they are in the same order
+/* Note: Don't change the first 6 bits below as they are in the same order
  * as the dabr and dabrx.
  */
 #define HW_BRK_TYPE_READ		0x01
@@ -46,7 +48,7 @@
 
 static inline int nr_wp_slots(void)
 {
-	return HBP_NUM_MAX;
+	return cpu_has_feature(CPU_FTR_DAWR1) ? 2 : 1;
 }
 
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
diff --git a/arch/powerpc/include/asm/hydra.h b/arch/powerpc/include/asm/hydra.h
index b3b0f2d..ae02eb5 100644
--- a/arch/powerpc/include/asm/hydra.h
+++ b/arch/powerpc/include/asm/hydra.h
@@ -10,7 +10,7 @@
  *
  *	© Copyright 1995 Apple Computer, Inc. All rights reserved.
  *
- *  It's available online from http://www.cpu.lu/~mlan/ftp/MacTech.pdf
+ *  It's available online from https://www.cpu.lu/~mlan/ftp/MacTech.pdf
  *  You can obtain paper copies of this book from computer bookstores or by
  *  writing Morgan Kaufmann Publishers, Inc., 340 Pine Street, Sixth Floor, San
  *  Francisco, CA 94104. Reference ISBN 1-55860-393-X.
diff --git a/arch/powerpc/include/asm/imc-pmu.h b/arch/powerpc/include/asm/imc-pmu.h
index 4da4fcb..4f89799 100644
--- a/arch/powerpc/include/asm/imc-pmu.h
+++ b/arch/powerpc/include/asm/imc-pmu.h
@@ -99,6 +99,11 @@
  */
 #define IMC_TRACE_RECORD_TB1_MASK      0x3ffffffffffULL
 
+/*
+ * Bit 0:1 in third DW of IMC trace record
+ * specifies the MSR[HV PR] values.
+ */
+#define IMC_TRACE_RECORD_VAL_HVPR(x)	((x) >> 62)
 
 /*
  * Device tree parser code detects IMC pmu support and
diff --git a/arch/powerpc/include/asm/inst.h b/arch/powerpc/include/asm/inst.h
index 45f3ec8..cc73c12 100644
--- a/arch/powerpc/include/asm/inst.h
+++ b/arch/powerpc/include/asm/inst.h
@@ -122,6 +122,25 @@
 #endif
 }
 
+#define PPC_INST_STR_LEN sizeof("00000000 00000000")
+
+static inline char *__ppc_inst_as_str(char str[PPC_INST_STR_LEN], struct ppc_inst x)
+{
+	if (ppc_inst_prefixed(x))
+		sprintf(str, "%08x %08x", ppc_inst_val(x), ppc_inst_suffix(x));
+	else
+		sprintf(str, "%08x", ppc_inst_val(x));
+
+	return str;
+}
+
+#define ppc_inst_as_str(x)		\
+({					\
+	char __str[PPC_INST_STR_LEN];	\
+	__ppc_inst_as_str(__str, x);	\
+	__str;				\
+})
+
 int probe_user_read_inst(struct ppc_inst *inst,
 			 struct ppc_inst __user *nip);
 
diff --git a/arch/powerpc/include/asm/kasan.h b/arch/powerpc/include/asm/kasan.h
index be85c70..d635b96 100644
--- a/arch/powerpc/include/asm/kasan.h
+++ b/arch/powerpc/include/asm/kasan.h
@@ -27,10 +27,12 @@
 
 #ifdef CONFIG_KASAN
 void kasan_early_init(void);
+void kasan_mmu_init(void);
 void kasan_init(void);
 void kasan_late_init(void);
 #else
 static inline void kasan_init(void) { }
+static inline void kasan_mmu_init(void) { }
 static inline void kasan_late_init(void) { }
 #endif
 
diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
index c684768..55d6ede 100644
--- a/arch/powerpc/include/asm/kexec.h
+++ b/arch/powerpc/include/asm/kexec.h
@@ -100,15 +100,26 @@
 #ifdef CONFIG_KEXEC_FILE
 extern const struct kexec_file_ops kexec_elf64_ops;
 
-#ifdef CONFIG_IMA_KEXEC
 #define ARCH_HAS_KIMAGE_ARCH
 
 struct kimage_arch {
+	struct crash_mem *exclude_ranges;
+
+	unsigned long backup_start;
+	void *backup_buf;
+
+	unsigned long elfcorehdr_addr;
+	unsigned long elf_headers_sz;
+	void *elf_headers;
+
+#ifdef CONFIG_IMA_KEXEC
 	phys_addr_t ima_buffer_addr;
 	size_t ima_buffer_size;
-};
 #endif
+};
 
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+			  unsigned long cmdline_len);
 int setup_purgatory(struct kimage *image, const void *slave_code,
 		    const void *fdt, unsigned long kernel_load_addr,
 		    unsigned long fdt_load_addr);
@@ -116,6 +127,20 @@
 		  unsigned long initrd_load_addr, unsigned long initrd_len,
 		  const char *cmdline);
 int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size);
+
+#ifdef CONFIG_PPC64
+struct kexec_buf;
+
+int load_crashdump_segments_ppc64(struct kimage *image,
+				  struct kexec_buf *kbuf);
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+			  const void *fdt, unsigned long kernel_load_addr,
+			  unsigned long fdt_load_addr);
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
+			unsigned long initrd_load_addr,
+			unsigned long initrd_len, const char *cmdline);
+#endif /* CONFIG_PPC64 */
+
 #endif /* CONFIG_KEXEC_FILE */
 
 #else /* !CONFIG_KEXEC_CORE */
@@ -150,6 +175,18 @@
 }
 
 #endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/kexec.h>
+#endif
+
+#ifndef reset_sprs
+#define reset_sprs reset_sprs
+static inline void reset_sprs(void)
+{
+}
+#endif
+
 #endif /* ! __ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_KEXEC_H */
diff --git a/arch/powerpc/include/asm/kexec_ranges.h b/arch/powerpc/include/asm/kexec_ranges.h
new file mode 100644
index 0000000..7a90000
--- /dev/null
+++ b/arch/powerpc/include/asm/kexec_ranges.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _ASM_POWERPC_KEXEC_RANGES_H
+#define _ASM_POWERPC_KEXEC_RANGES_H
+
+#define MEM_RANGE_CHUNK_SZ		2048	/* Memory ranges size chunk */
+
+void sort_memory_ranges(struct crash_mem *mrngs, bool merge);
+struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges);
+int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size);
+int add_tce_mem_ranges(struct crash_mem **mem_ranges);
+int add_initrd_mem_range(struct crash_mem **mem_ranges);
+#ifdef CONFIG_PPC_BOOK3S_64
+int add_htab_mem_range(struct crash_mem **mem_ranges);
+#else
+static inline int add_htab_mem_range(struct crash_mem **mem_ranges)
+{
+	return 0;
+}
+#endif
+int add_kernel_mem_range(struct crash_mem **mem_ranges);
+int add_rtas_mem_range(struct crash_mem **mem_ranges);
+int add_opal_mem_range(struct crash_mem **mem_ranges);
+int add_reserved_mem_ranges(struct crash_mem **mem_ranges);
+
+#endif /* _ASM_POWERPC_KEXEC_RANGES_H */
diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h
index 45704f2..078f464 100644
--- a/arch/powerpc/include/asm/kvm_book3s_asm.h
+++ b/arch/powerpc/include/asm/kvm_book3s_asm.h
@@ -119,7 +119,7 @@
 	void __iomem *xive_tima_virt;
 	u32 saved_xirr;
 	u64 dabr;
-	u64 host_mmcr[7];	/* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER */
+	u64 host_mmcr[10];	/* MMCR 0,1,A, SIAR, SDAR, MMCR2, SIER, MMCR3, SIER2/3 */
 	u32 host_pmc[8];
 	u64 host_purr;
 	u64 host_spurr;
diff --git a/arch/powerpc/include/asm/kvm_booke.h b/arch/powerpc/include/asm/kvm_booke.h
index 310ba48..0c3401b 100644
--- a/arch/powerpc/include/asm/kvm_booke.h
+++ b/arch/powerpc/include/asm/kvm_booke.h
@@ -89,10 +89,12 @@
 	return vcpu->arch.regs.nip;
 }
 
+#ifdef CONFIG_BOOKE
 static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
 {
 	return vcpu->arch.fault_dear;
 }
+#endif
 
 static inline bool kvmppc_supports_magic_page(struct kvm_vcpu *vcpu)
 {
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 7e2d061..e020d26 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -637,12 +637,14 @@
 	u32 ccr1;
 	u32 dbsr;
 
-	u64 mmcr[5];
+	u64 mmcr[4];	/* MMCR0, MMCR1, MMCR2, MMCR3 */
+	u64 mmcra;
+	u64 mmcrs;
 	u32 pmc[8];
 	u32 spmc[2];
 	u64 siar;
 	u64 sdar;
-	u64 sier;
+	u64 sier[3];
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 	u64 tfhar;
 	u64 texasr;
diff --git a/arch/powerpc/include/asm/kvm_para.h b/arch/powerpc/include/asm/kvm_para.h
index 9c1f6b4..7446120 100644
--- a/arch/powerpc/include/asm/kvm_para.h
+++ b/arch/powerpc/include/asm/kvm_para.h
@@ -8,35 +8,15 @@
 #ifndef __POWERPC_KVM_PARA_H__
 #define __POWERPC_KVM_PARA_H__
 
+#include <asm/firmware.h>
+
 #include <uapi/asm/kvm_para.h>
 
-#ifdef CONFIG_KVM_GUEST
-
-#include <linux/of.h>
-
 static inline int kvm_para_available(void)
 {
-	struct device_node *hyper_node;
-
-	hyper_node = of_find_node_by_path("/hypervisor");
-	if (!hyper_node)
-		return 0;
-
-	if (!of_device_is_compatible(hyper_node, "linux,kvm"))
-		return 0;
-
-	return 1;
+	return IS_ENABLED(CONFIG_KVM_GUEST) && is_kvm_guest();
 }
 
-#else
-
-static inline int kvm_para_available(void)
-{
-	return 0;
-}
-
-#endif
-
 static inline unsigned int kvm_arch_para_features(void)
 {
 	unsigned long r;
diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h
index 7bcb644..a90b892 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -131,7 +131,7 @@
 				    unsigned long dabrx);
 
 	/* Set DAWR for this platform, leave empty for default implementation */
-	int		(*set_dawr)(unsigned long dawr,
+	int		(*set_dawr)(int nr, unsigned long dawr,
 				    unsigned long dawrx);
 
 #ifdef CONFIG_PPC32	/* XXX for now */
diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h
index 376a395..adf2cda 100644
--- a/arch/powerpc/include/asm/mce.h
+++ b/arch/powerpc/include/asm/mce.h
@@ -86,6 +86,7 @@
 enum MCE_UserErrorType {
 	MCE_USER_ERROR_INDETERMINATE = 0,
 	MCE_USER_ERROR_TLBIE = 1,
+	MCE_USER_ERROR_SCV = 2,
 };
 
 enum MCE_RaErrorType {
@@ -220,6 +221,8 @@
 unsigned long addr_to_pfn(struct pt_regs *regs, unsigned long addr);
 extern void mce_common_process_ue(struct pt_regs *regs,
 				  struct mce_error_info *mce_err);
+int mce_register_notifier(struct notifier_block *nb);
+int mce_unregister_notifier(struct notifier_block *nb);
 #ifdef CONFIG_PPC_BOOK3S_64
 void flush_and_reload_slb(void);
 #endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/include/asm/mman.h b/arch/powerpc/include/asm/mman.h
index d610c2e..7c07728 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -13,42 +13,20 @@
 #include <linux/pkeys.h>
 #include <asm/cpu_has_feature.h>
 
-/*
- * This file is included by linux/mman.h, so we can't use cacl_vm_prot_bits()
- * here.  How important is the optimization?
- */
+#ifdef CONFIG_PPC_MEM_KEYS
 static inline unsigned long arch_calc_vm_prot_bits(unsigned long prot,
 		unsigned long pkey)
 {
-#ifdef CONFIG_PPC_MEM_KEYS
-	return (((prot & PROT_SAO) ? VM_SAO : 0) | pkey_to_vmflag_bits(pkey));
-#else
-	return ((prot & PROT_SAO) ? VM_SAO : 0);
-#endif
+	return pkey_to_vmflag_bits(pkey);
 }
 #define arch_calc_vm_prot_bits(prot, pkey) arch_calc_vm_prot_bits(prot, pkey)
 
 static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
 {
-#ifdef CONFIG_PPC_MEM_KEYS
-	return (vm_flags & VM_SAO) ?
-		__pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
-		__pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
-#else
-	return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
-#endif
+	return __pgprot(vmflag_to_pte_pkey_bits(vm_flags));
 }
 #define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
-
-static inline bool arch_validate_prot(unsigned long prot, unsigned long addr)
-{
-	if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_SAO))
-		return false;
-	if ((prot & PROT_SAO) && !cpu_has_feature(CPU_FTR_SAO))
-		return false;
-	return true;
-}
-#define arch_validate_prot arch_validate_prot
+#endif
 
 #endif /* CONFIG_PPC64 */
 #endif	/* _ASM_POWERPC_MMAN_H */
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index f4ac25d..255a183 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -29,6 +29,19 @@
  */
 
 /*
+ * Support for KUEP feature.
+ */
+#define MMU_FTR_KUEP			ASM_CONST(0x00000400)
+
+/*
+ * Support for memory protection keys.
+ */
+#define MMU_FTR_PKEY			ASM_CONST(0x00000800)
+
+/* Guest Translation Shootdown Enable */
+#define MMU_FTR_GTSE			ASM_CONST(0x00001000)
+
+/*
  * Support for 68 bit VA space. We added that from ISA 2.05
  */
 #define MMU_FTR_68_BIT_VA		ASM_CONST(0x00002000)
@@ -173,10 +186,18 @@
 #endif
 #ifdef CONFIG_PPC_RADIX_MMU
 		MMU_FTR_TYPE_RADIX |
+		MMU_FTR_GTSE |
 #ifdef CONFIG_PPC_KUAP
 		MMU_FTR_RADIX_KUAP |
 #endif /* CONFIG_PPC_KUAP */
 #endif /* CONFIG_PPC_RADIX_MMU */
+#ifdef CONFIG_PPC_MEM_KEYS
+	MMU_FTR_PKEY |
+#endif
+#ifdef CONFIG_PPC_KUEP
+	MMU_FTR_KUEP |
+#endif /* CONFIG_PPC_KUAP */
+
 		0,
 };
 
@@ -356,6 +377,8 @@
 				       phys_addr_t first_memblock_size);
 static inline void mmu_early_init_devtree(void) { }
 
+static inline void pkey_early_init_devtree(void) {}
+
 extern void *abatron_pteptrs[2];
 #endif /* __ASSEMBLY__ */
 #endif
diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h
index 1a474f6b..7f3658a 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -218,8 +218,6 @@
 static inline void dec_mm_active_cpus(struct mm_struct *mm) { }
 static inline void mm_context_add_copro(struct mm_struct *mm) { }
 static inline void mm_context_remove_copro(struct mm_struct *mm) { }
-static inline void mm_context_add_vas_windows(struct mm_struct *mm) { }
-static inline void mm_context_remove_vas_windows(struct mm_struct *mm) { }
 #endif
 
 
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h
index b0afbdd..b9e134d 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -249,6 +249,18 @@
 
 	return old;
 }
+
+#ifdef CONFIG_PPC_16K_PAGES
+#define __HAVE_ARCH_PTEP_GET
+static inline pte_t ptep_get(pte_t *ptep)
+{
+	pte_basic_t val = READ_ONCE(ptep->pte);
+	pte_t pte = {val, val, val, val};
+
+	return pte;
+}
+#endif /* CONFIG_PPC_16K_PAGES */
+
 #else
 static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p,
 				     unsigned long clr, unsigned long set, int huge)
@@ -284,16 +296,6 @@
 	return __pte(pte_update(mm, addr, ptep, ~0, 0, 0));
 }
 
-#if defined(CONFIG_PPC_8xx) && defined(CONFIG_PPC_16K_PAGES)
-#define __HAVE_ARCH_PTEP_GET
-static inline pte_t ptep_get(pte_t *ptep)
-{
-	pte_t pte = {READ_ONCE(ptep->pte), 0, 0, 0};
-
-	return pte;
-}
-#endif
-
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
 				      pte_t *ptep)
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 6cb8aa3..59ee9fa 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -82,8 +82,6 @@
  */
 #include <asm/nohash/pte-book3e.h>
 
-#define _PAGE_SAO	0
-
 #define PTE_RPN_MASK	(~((1UL << PTE_RPN_SHIFT) - 1))
 
 /*
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 84b2564..9454d29 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -225,6 +225,7 @@
 	u16 in_mce;
 	u8 hmi_event_available;		/* HMI event is available */
 	u8 hmi_p9_special_emu;		/* HMI P9 special emulation */
+	u32 hmi_irqs;			/* HMI irq stat */
 #endif
 	u8 ftrace_enabled;		/* Hard disable ftrace */
 
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index a63fe6f..2546872 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -255,8 +255,10 @@
  */
 #ifdef CONFIG_PPC_BOOK3E_64
 #define is_kernel_addr(x)	((x) >= 0x8000000000000000ul)
-#else
+#elif defined(CONFIG_PPC_BOOK3S_64)
 #define is_kernel_addr(x)	((x) >= PAGE_OFFSET)
+#else
+#define is_kernel_addr(x)	((x) >= TASK_SIZE)
 #endif
 
 #ifndef CONFIG_PPC_BOOK3S_64
diff --git a/arch/powerpc/include/asm/paravirt.h b/arch/powerpc/include/asm/paravirt.h
new file mode 100644
index 0000000..9362c94
--- /dev/null
+++ b/arch/powerpc/include/asm/paravirt.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_PARAVIRT_H
+#define _ASM_POWERPC_PARAVIRT_H
+
+#include <linux/jump_label.h>
+#include <asm/smp.h>
+#ifdef CONFIG_PPC64
+#include <asm/paca.h>
+#include <asm/hvcall.h>
+#endif
+
+#ifdef CONFIG_PPC_SPLPAR
+DECLARE_STATIC_KEY_FALSE(shared_processor);
+
+static inline bool is_shared_processor(void)
+{
+	return static_branch_unlikely(&shared_processor);
+}
+
+/* If bit 0 is set, the cpu has been preempted */
+static inline u32 yield_count_of(int cpu)
+{
+	__be32 yield_count = READ_ONCE(lppaca_of(cpu).yield_count);
+	return be32_to_cpu(yield_count);
+}
+
+static inline void yield_to_preempted(int cpu, u32 yield_count)
+{
+	plpar_hcall_norets(H_CONFER, get_hard_smp_processor_id(cpu), yield_count);
+}
+
+static inline void prod_cpu(int cpu)
+{
+	plpar_hcall_norets(H_PROD, get_hard_smp_processor_id(cpu));
+}
+
+static inline void yield_to_any(void)
+{
+	plpar_hcall_norets(H_CONFER, -1, 0);
+}
+#else
+static inline bool is_shared_processor(void)
+{
+	return false;
+}
+
+static inline u32 yield_count_of(int cpu)
+{
+	return 0;
+}
+
+extern void ___bad_yield_to_preempted(void);
+static inline void yield_to_preempted(int cpu, u32 yield_count)
+{
+	___bad_yield_to_preempted(); /* This would be a bug */
+}
+
+extern void ___bad_yield_to_any(void);
+static inline void yield_to_any(void)
+{
+	___bad_yield_to_any(); /* This would be a bug */
+}
+
+extern void ___bad_prod_cpu(void);
+static inline void prod_cpu(int cpu)
+{
+	___bad_prod_cpu(); /* This would be a bug */
+}
+
+#endif
+
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+	if (!is_shared_processor())
+		return false;
+	if (yield_count_of(cpu) & 1)
+		return true;
+	return false;
+}
+
+static inline bool pv_is_native_spin_unlock(void)
+{
+	return !is_shared_processor();
+}
+
+#endif /* _ASM_POWERPC_PARAVIRT_H */
diff --git a/arch/powerpc/include/asm/pci-bridge.h b/arch/powerpc/include/asm/pci-bridge.h
index b92e81b..d2a2a14 100644
--- a/arch/powerpc/include/asm/pci-bridge.h
+++ b/arch/powerpc/include/asm/pci-bridge.h
@@ -202,7 +202,6 @@
 #define IODA_INVALID_PE		0xFFFFFFFF
 	unsigned int pe_number;
 #ifdef CONFIG_PCI_IOV
-	int     vf_index;		/* VF index in the PF */
 	u16     vfs_expanded;		/* number of VFs IOV BAR expanded */
 	u16     num_vfs;		/* number of VFs enabled*/
 	unsigned int *pe_num_map;	/* PE# for the first VF PE or array */
diff --git a/arch/powerpc/include/asm/percpu.h b/arch/powerpc/include/asm/percpu.h
index dce863a..8e5b7d0b8 100644
--- a/arch/powerpc/include/asm/percpu.h
+++ b/arch/powerpc/include/asm/percpu.h
@@ -10,8 +10,6 @@
 
 #ifdef CONFIG_SMP
 
-#include <asm/paca.h>
-
 #define __my_cpu_offset local_paca->data_offset
 
 #endif /* CONFIG_SMP */
@@ -19,4 +17,6 @@
 
 #include <asm-generic/percpu.h>
 
+#include <asm/paca.h>
+
 #endif /* _ASM_POWERPC_PERCPU_H_ */
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index eed3954..1e8b2e1 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -12,6 +12,8 @@
 
 #ifdef CONFIG_PPC_PERF_CTRS
 #include <asm/perf_event_server.h>
+#else
+static inline bool is_sier_available(void) { return false; }
 #endif
 
 #ifdef CONFIG_FSL_EMB_PERF_EVENT
diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h
index 3e9703f..86c9eb06 100644
--- a/arch/powerpc/include/asm/perf_event_server.h
+++ b/arch/powerpc/include/asm/perf_event_server.h
@@ -17,6 +17,13 @@
 
 struct perf_event;
 
+struct mmcr_regs {
+	unsigned long mmcr0;
+	unsigned long mmcr1;
+	unsigned long mmcr2;
+	unsigned long mmcra;
+	unsigned long mmcr3;
+};
 /*
  * This struct provides the constants and functions needed to
  * describe the PMU on a particular POWER-family CPU.
@@ -28,7 +35,7 @@
 	unsigned long	add_fields;
 	unsigned long	test_adder;
 	int		(*compute_mmcr)(u64 events[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[],
+				unsigned int hwc[], struct mmcr_regs *mmcr,
 				struct perf_event *pevents[]);
 	int		(*get_constraint)(u64 event_id, unsigned long *mskp,
 				unsigned long *valp);
@@ -41,13 +48,13 @@
 	unsigned long	group_constraint_val;
 	u64             (*bhrb_filter_map)(u64 branch_sample_type);
 	void            (*config_bhrb)(u64 pmu_bhrb_filter);
-	void		(*disable_pmc)(unsigned int pmc, unsigned long mmcr[]);
+	void		(*disable_pmc)(unsigned int pmc, struct mmcr_regs *mmcr);
 	int		(*limited_pmc_event)(u64 event_id);
 	u32		flags;
 	const struct attribute_group	**attr_groups;
 	int		n_generic;
 	int		*generic_events;
-	int		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
+	u64		(*cache_events)[PERF_COUNT_HW_CACHE_MAX]
 			       [PERF_COUNT_HW_CACHE_OP_MAX]
 			       [PERF_COUNT_HW_CACHE_RESULT_MAX];
 
@@ -69,6 +76,7 @@
 #define PPMU_HAS_SIER		0x00000040 /* Has SIER */
 #define PPMU_ARCH_207S		0x00000080 /* PMC is architecture v2.07S */
 #define PPMU_NO_SIAR		0x00000100 /* Do not use SIAR */
+#define PPMU_ARCH_31		0x00000200 /* Has MMCR3, SIER2 and SIER3 */
 
 /*
  * Values for flags to get_alternatives()
diff --git a/arch/powerpc/include/asm/pkeys.h b/arch/powerpc/include/asm/pkeys.h
index 2fe6cae..a795104 100644
--- a/arch/powerpc/include/asm/pkeys.h
+++ b/arch/powerpc/include/asm/pkeys.h
@@ -11,9 +11,7 @@
 #include <linux/jump_label.h>
 #include <asm/firmware.h>
 
-DECLARE_STATIC_KEY_TRUE(pkey_disabled);
-extern int pkeys_total; /* total pkeys as per device tree */
-extern u32 initial_allocation_mask; /*  bits set for the initially allocated keys */
+extern int num_pkey;
 extern u32 reserved_allocation_mask; /* bits set for reserved keys */
 
 #define ARCH_VM_PKEY_FLAGS (VM_PKEY_BIT0 | VM_PKEY_BIT1 | VM_PKEY_BIT2 | \
@@ -25,48 +23,28 @@
 				PKEY_DISABLE_WRITE  | \
 				PKEY_DISABLE_EXECUTE)
 
+#ifdef CONFIG_PPC_BOOK3S_64
+#include <asm/book3s/64/pkeys.h>
+#else
+#error "Not supported"
+#endif
+
+
 static inline u64 pkey_to_vmflag_bits(u16 pkey)
 {
 	return (((u64)pkey << VM_PKEY_SHIFT) & ARCH_VM_PKEY_FLAGS);
 }
 
-static inline u64 vmflag_to_pte_pkey_bits(u64 vm_flags)
-{
-	if (static_branch_likely(&pkey_disabled))
-		return 0x0UL;
-
-	return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT4 : 0x0UL) |
-		((vm_flags & VM_PKEY_BIT1) ? H_PTE_PKEY_BIT3 : 0x0UL) |
-		((vm_flags & VM_PKEY_BIT2) ? H_PTE_PKEY_BIT2 : 0x0UL) |
-		((vm_flags & VM_PKEY_BIT3) ? H_PTE_PKEY_BIT1 : 0x0UL) |
-		((vm_flags & VM_PKEY_BIT4) ? H_PTE_PKEY_BIT0 : 0x0UL));
-}
-
 static inline int vma_pkey(struct vm_area_struct *vma)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return 0;
 	return (vma->vm_flags & ARCH_VM_PKEY_FLAGS) >> VM_PKEY_SHIFT;
 }
 
-#define arch_max_pkey() pkeys_total
-
-static inline u64 pte_to_hpte_pkey_bits(u64 pteflags)
+static inline int arch_max_pkey(void)
 {
-	return (((pteflags & H_PTE_PKEY_BIT0) ? HPTE_R_KEY_BIT0 : 0x0UL) |
-		((pteflags & H_PTE_PKEY_BIT1) ? HPTE_R_KEY_BIT1 : 0x0UL) |
-		((pteflags & H_PTE_PKEY_BIT2) ? HPTE_R_KEY_BIT2 : 0x0UL) |
-		((pteflags & H_PTE_PKEY_BIT3) ? HPTE_R_KEY_BIT3 : 0x0UL) |
-		((pteflags & H_PTE_PKEY_BIT4) ? HPTE_R_KEY_BIT4 : 0x0UL));
-}
-
-static inline u16 pte_to_pkey_bits(u64 pteflags)
-{
-	return (((pteflags & H_PTE_PKEY_BIT0) ? 0x10 : 0x0UL) |
-		((pteflags & H_PTE_PKEY_BIT1) ? 0x8 : 0x0UL) |
-		((pteflags & H_PTE_PKEY_BIT2) ? 0x4 : 0x0UL) |
-		((pteflags & H_PTE_PKEY_BIT3) ? 0x2 : 0x0UL) |
-		((pteflags & H_PTE_PKEY_BIT4) ? 0x1 : 0x0UL));
+	return num_pkey;
 }
 
 #define pkey_alloc_mask(pkey) (0x1 << pkey)
@@ -114,9 +92,8 @@
 	u32 all_pkeys_mask = (u32)(~(0x0));
 	int ret;
 
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return -1;
-
 	/*
 	 * Are we out of pkeys? We must handle this specially because ffz()
 	 * behavior is undefined if there are no zeros.
@@ -132,7 +109,7 @@
 
 static inline int mm_pkey_free(struct mm_struct *mm, int pkey)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return -1;
 
 	if (!mm_pkey_is_allocated(mm, pkey))
@@ -147,21 +124,13 @@
  * Try to dedicate one of the protection keys to be used as an
  * execute-only protection key.
  */
-extern int __execute_only_pkey(struct mm_struct *mm);
-static inline int execute_only_pkey(struct mm_struct *mm)
-{
-	if (static_branch_likely(&pkey_disabled))
-		return -1;
-
-	return __execute_only_pkey(mm);
-}
-
+extern int execute_only_pkey(struct mm_struct *mm);
 extern int __arch_override_mprotect_pkey(struct vm_area_struct *vma,
 					 int prot, int pkey);
 static inline int arch_override_mprotect_pkey(struct vm_area_struct *vma,
 					      int prot, int pkey)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return 0;
 
 	/*
@@ -179,7 +148,7 @@
 static inline int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
 					    unsigned long init_val)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return -EINVAL;
 
 	/*
@@ -196,7 +165,7 @@
 
 static inline bool arch_pkeys_enabled(void)
 {
-	return !static_branch_likely(&pkey_disabled);
+	return mmu_has_feature(MMU_FTR_PKEY);
 }
 
 extern void pkey_mm_init(struct mm_struct *mm);
diff --git a/arch/powerpc/include/asm/plpar_wrappers.h b/arch/powerpc/include/asm/plpar_wrappers.h
index 4497c8a..ece84a4 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -312,7 +312,12 @@
 
 static inline long plpar_set_watchpoint0(unsigned long dawr0, unsigned long dawrx0)
 {
-	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR, dawr0, dawrx0);
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR0, dawr0, dawrx0);
+}
+
+static inline long plpar_set_watchpoint1(unsigned long dawr1, unsigned long dawrx1)
+{
+	return plpar_set_mode(0, H_SET_MODE_RESOURCE_SET_DAWR1, dawr1, dawrx1);
 }
 
 static inline long plpar_signal_sys_reset(long cpu)
@@ -334,6 +339,51 @@
 	return rc;
 }
 
+/*
+ * Wrapper to H_RPT_INVALIDATE hcall that handles return values appropriately
+ *
+ * - Returns H_SUCCESS on success
+ * - For H_BUSY return value, we retry the hcall.
+ * - For any other hcall failures, attempt a full flush once before
+ *   resorting to BUG().
+ *
+ * Note: This hcall is expected to fail only very rarely. The correct
+ * error recovery of killing the process/guest will be eventually
+ * needed.
+ */
+static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type,
+					  u64 page_sizes, u64 start, u64 end)
+{
+	long rc;
+	unsigned long all;
+
+	while (true) {
+		rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target, type,
+					page_sizes, start, end);
+		if (rc == H_BUSY) {
+			cpu_relax();
+			continue;
+		} else if (rc == H_SUCCESS)
+			return rc;
+
+		/* Flush request failed, try with a full flush once */
+		if (type & H_RPTI_TYPE_NESTED)
+			all = H_RPTI_TYPE_NESTED | H_RPTI_TYPE_NESTED_ALL;
+		else
+			all = H_RPTI_TYPE_ALL;
+retry:
+		rc = plpar_hcall_norets(H_RPT_INVALIDATE, pid, target,
+					all, page_sizes, 0, -1UL);
+		if (rc == H_BUSY) {
+			cpu_relax();
+			goto retry;
+		} else if (rc == H_SUCCESS)
+			return rc;
+
+		BUG();
+	}
+}
+
 #else /* !CONFIG_PPC_PSERIES */
 
 static inline long plpar_set_ciabr(unsigned long ciabr)
@@ -346,6 +396,13 @@
 {
 	return 0;
 }
+
+static inline long pseries_rpt_invalidate(u32 pid, u64 target, u64 type,
+					  u64 page_sizes, u64 start, u64 end)
+{
+	return 0;
+}
+
 #endif /* CONFIG_PPC_PSERIES */
 
 #endif /* _ASM_POWERPC_PLPAR_WRAPPERS_H */
diff --git a/arch/powerpc/include/asm/pnv-ocxl.h b/arch/powerpc/include/asm/pnv-ocxl.h
index 7de8264..ee79d2c 100644
--- a/arch/powerpc/include/asm/pnv-ocxl.h
+++ b/arch/powerpc/include/asm/pnv-ocxl.h
@@ -9,28 +9,26 @@
 #define PNV_OCXL_TL_BITS_PER_RATE       4
 #define PNV_OCXL_TL_RATE_BUF_SIZE       ((PNV_OCXL_TL_MAX_TEMPLATE+1) * PNV_OCXL_TL_BITS_PER_RATE / 8)
 
-extern int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
-			u16 *supported);
-extern int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
+int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, u16 *supported);
+int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count);
 
-extern int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
+int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
 			char *rate_buf, int rate_buf_size);
-extern int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
-			uint64_t rate_buf_phys, int rate_buf_size);
+int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
+			 uint64_t rate_buf_phys, int rate_buf_size);
 
-extern int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq);
-extern void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
-				void __iomem *tfc, void __iomem *pe_handle);
-extern int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
-				void __iomem **dar, void __iomem **tfc,
-				void __iomem **pe_handle);
+int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq);
+void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
+			     void __iomem *tfc, void __iomem *pe_handle);
+int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
+			  void __iomem **dar, void __iomem **tfc,
+			  void __iomem **pe_handle);
 
-extern int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
-			void **platform_data);
-extern void pnv_ocxl_spa_release(void *platform_data);
-extern int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);
+int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, void **platform_data);
+void pnv_ocxl_spa_release(void *platform_data);
+int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle);
 
-extern int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
-extern void pnv_ocxl_free_xive_irq(u32 irq);
+int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr);
+void pnv_ocxl_free_xive_irq(u32 irq);
 
 #endif /* _ASM_PNV_OCXL_H */
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 2a39c71..a6e3700 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -76,6 +76,19 @@
 #define	__REGA0_R30	30
 #define	__REGA0_R31	31
 
+#define IMM_L(i)               ((uintptr_t)(i) & 0xffff)
+#define IMM_DS(i)              ((uintptr_t)(i) & 0xfffc)
+
+/*
+ * 16-bit immediate helper macros: HA() is for use with sign-extending instrs
+ * (e.g. LD, ADDI).  If the bottom 16 bits is "-ve", add another bit into the
+ * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000).
+ */
+#define IMM_H(i)                ((uintptr_t)(i)>>16)
+#define IMM_HA(i)               (((uintptr_t)(i)>>16) +                       \
+					(((uintptr_t)(i) & 0x8000) >> 15))
+
+
 /* opcode and xopcode for instructions */
 #define OP_TRAP 3
 #define OP_TRAP_64 2
@@ -195,56 +208,28 @@
 #define OP_LQ    56
 
 /* sorted alphabetically */
-#define PPC_INST_BHRBE			0x7c00025c
-#define PPC_INST_CLRBHRB		0x7c00035c
+#define PPC_INST_BCCTR_FLUSH		0x4c400420
 #define PPC_INST_COPY			0x7c20060c
-#define PPC_INST_CP_ABORT		0x7c00068c
-#define PPC_INST_DARN			0x7c0005e6
 #define PPC_INST_DCBA			0x7c0005ec
 #define PPC_INST_DCBA_MASK		0xfc0007fe
-#define PPC_INST_DCBAL			0x7c2005ec
-#define PPC_INST_DCBZL			0x7c2007ec
-#define PPC_INST_ICBT			0x7c00002c
-#define PPC_INST_ICSWX			0x7c00032d
-#define PPC_INST_ICSWEPX		0x7c00076d
 #define PPC_INST_ISEL			0x7c00001e
 #define PPC_INST_ISEL_MASK		0xfc00003e
-#define PPC_INST_LDARX			0x7c0000a8
-#define PPC_INST_STDCX			0x7c0001ad
-#define PPC_INST_LQARX			0x7c000228
-#define PPC_INST_STQCX			0x7c00016d
 #define PPC_INST_LSWI			0x7c0004aa
 #define PPC_INST_LSWX			0x7c00042a
-#define PPC_INST_LWARX			0x7c000028
-#define PPC_INST_STWCX			0x7c00012d
 #define PPC_INST_LWSYNC			0x7c2004ac
 #define PPC_INST_SYNC			0x7c0004ac
 #define PPC_INST_SYNC_MASK		0xfc0007fe
 #define PPC_INST_ISYNC			0x4c00012c
-#define PPC_INST_LXVD2X			0x7c000698
 #define PPC_INST_MCRXR			0x7c000400
 #define PPC_INST_MCRXR_MASK		0xfc0007fe
 #define PPC_INST_MFSPR_PVR		0x7c1f42a6
 #define PPC_INST_MFSPR_PVR_MASK		0xfc1ffffe
-#define PPC_INST_MFTMR			0x7c0002dc
-#define PPC_INST_MSGSND			0x7c00019c
-#define PPC_INST_MSGCLR			0x7c0001dc
-#define PPC_INST_MSGSYNC		0x7c0006ec
-#define PPC_INST_MSGSNDP		0x7c00011c
-#define PPC_INST_MSGCLRP		0x7c00015c
 #define PPC_INST_MTMSRD			0x7c000164
-#define PPC_INST_MTTMR			0x7c0003dc
 #define PPC_INST_NOP			0x60000000
-#define PPC_INST_PASTE			0x7c20070d
 #define PPC_INST_POPCNTB		0x7c0000f4
 #define PPC_INST_POPCNTB_MASK		0xfc0007fe
-#define PPC_INST_POPCNTD		0x7c0003f4
-#define PPC_INST_POPCNTW		0x7c0002f4
 #define PPC_INST_RFEBB			0x4c000124
-#define PPC_INST_RFCI			0x4c000066
-#define PPC_INST_RFDI			0x4c00004e
 #define PPC_INST_RFID			0x4c000024
-#define PPC_INST_RFMCI			0x4c00004c
 #define PPC_INST_MFSPR			0x7c0002a6
 #define PPC_INST_MFSPR_DSCR		0x7c1102a6
 #define PPC_INST_MFSPR_DSCR_MASK	0xfc1ffffe
@@ -254,131 +239,43 @@
 #define PPC_INST_MFSPR_DSCR_USER_MASK	0xfc1ffffe
 #define PPC_INST_MTSPR_DSCR_USER	0x7c0303a6
 #define PPC_INST_MTSPR_DSCR_USER_MASK	0xfc1ffffe
-#define PPC_INST_MFVSRD			0x7c000066
-#define PPC_INST_MTVSRD			0x7c000166
 #define PPC_INST_SC			0x44000002
-#define PPC_INST_SLBFEE			0x7c0007a7
-#define PPC_INST_SLBIA			0x7c0003e4
-
 #define PPC_INST_STRING			0x7c00042a
 #define PPC_INST_STRING_MASK		0xfc0007fe
 #define PPC_INST_STRING_GEN_MASK	0xfc00067e
-
 #define PPC_INST_STSWI			0x7c0005aa
 #define PPC_INST_STSWX			0x7c00052a
-#define PPC_INST_STXVD2X		0x7c000798
-#define PPC_INST_TLBIE			0x7c000264
-#define PPC_INST_TLBIEL			0x7c000224
-#define PPC_INST_TLBILX			0x7c000024
-#define PPC_INST_WAIT			0x7c00007c
-#define PPC_INST_TLBIVAX		0x7c000624
-#define PPC_INST_TLBSRX_DOT		0x7c0006a5
-#define PPC_INST_VPMSUMW		0x10000488
-#define PPC_INST_VPMSUMD		0x100004c8
-#define PPC_INST_VPERMXOR		0x1000002d
-#define PPC_INST_XXLOR			0xf0000490
-#define PPC_INST_XXSWAPD		0xf0000250
-#define PPC_INST_XVCPSGNDP		0xf0000780
 #define PPC_INST_TRECHKPT		0x7c0007dd
 #define PPC_INST_TRECLAIM		0x7c00075d
-#define PPC_INST_TABORT			0x7c00071d
 #define PPC_INST_TSR			0x7c0005dd
-
-#define PPC_INST_NAP			0x4c000364
-#define PPC_INST_SLEEP			0x4c0003a4
-#define PPC_INST_WINKLE			0x4c0003e4
-
-#define PPC_INST_STOP			0x4c0002e4
-
-/* A2 specific instructions */
-#define PPC_INST_ERATWE			0x7c0001a6
-#define PPC_INST_ERATRE			0x7c000166
-#define PPC_INST_ERATILX		0x7c000066
-#define PPC_INST_ERATIVAX		0x7c000666
-#define PPC_INST_ERATSX			0x7c000126
-#define PPC_INST_ERATSX_DOT		0x7c000127
-
-/* Misc instructions for BPF compiler */
-#define PPC_INST_LBZ			0x88000000
 #define PPC_INST_LD			0xe8000000
-#define PPC_INST_LDX			0x7c00002a
-#define PPC_INST_LHZ			0xa0000000
-#define PPC_INST_LWZ			0x80000000
-#define PPC_INST_LHBRX			0x7c00062c
-#define PPC_INST_LDBRX			0x7c000428
-#define PPC_INST_STB			0x98000000
-#define PPC_INST_STH			0xb0000000
 #define PPC_INST_STD			0xf8000000
-#define PPC_INST_STDX			0x7c00012a
-#define PPC_INST_STDU			0xf8000001
-#define PPC_INST_STW			0x90000000
-#define PPC_INST_STWU			0x94000000
 #define PPC_INST_MFLR			0x7c0802a6
-#define PPC_INST_MTLR			0x7c0803a6
 #define PPC_INST_MTCTR			0x7c0903a6
-#define PPC_INST_CMPWI			0x2c000000
-#define PPC_INST_CMPDI			0x2c200000
-#define PPC_INST_CMPW			0x7c000000
-#define PPC_INST_CMPD			0x7c200000
-#define PPC_INST_CMPLW			0x7c000040
-#define PPC_INST_CMPLD			0x7c200040
-#define PPC_INST_CMPLWI			0x28000000
-#define PPC_INST_CMPLDI			0x28200000
 #define PPC_INST_ADDI			0x38000000
 #define PPC_INST_ADDIS			0x3c000000
 #define PPC_INST_ADD			0x7c000214
-#define PPC_INST_ADDC			0x7c000014
-#define PPC_INST_SUB			0x7c000050
 #define PPC_INST_BLR			0x4e800020
-#define PPC_INST_BLRL			0x4e800021
 #define PPC_INST_BCTR			0x4e800420
-#define PPC_INST_MULLD			0x7c0001d2
-#define PPC_INST_MULLW			0x7c0001d6
-#define PPC_INST_MULHWU			0x7c000016
-#define PPC_INST_MULLI			0x1c000000
-#define PPC_INST_MADDHD			0x10000030
-#define PPC_INST_MADDHDU		0x10000031
-#define PPC_INST_MADDLD			0x10000033
-#define PPC_INST_DIVWU			0x7c000396
+#define PPC_INST_BCTRL			0x4e800421
 #define PPC_INST_DIVD			0x7c0003d2
-#define PPC_INST_DIVDU			0x7c000392
-#define PPC_INST_RLWINM			0x54000000
-#define PPC_INST_RLWINM_DOT		0x54000001
-#define PPC_INST_RLWIMI			0x50000000
-#define PPC_INST_RLDICL			0x78000000
 #define PPC_INST_RLDICR			0x78000004
-#define PPC_INST_SLW			0x7c000030
-#define PPC_INST_SLD			0x7c000036
-#define PPC_INST_SRW			0x7c000430
-#define PPC_INST_SRAW			0x7c000630
-#define PPC_INST_SRAWI			0x7c000670
-#define PPC_INST_SRD			0x7c000436
-#define PPC_INST_SRAD			0x7c000634
-#define PPC_INST_SRADI			0x7c000674
-#define PPC_INST_AND			0x7c000038
-#define PPC_INST_ANDDOT			0x7c000039
-#define PPC_INST_OR			0x7c000378
-#define PPC_INST_XOR			0x7c000278
-#define PPC_INST_ANDI			0x70000000
 #define PPC_INST_ORI			0x60000000
 #define PPC_INST_ORIS			0x64000000
-#define PPC_INST_XORI			0x68000000
-#define PPC_INST_XORIS			0x6c000000
-#define PPC_INST_NEG			0x7c0000d0
-#define PPC_INST_EXTSW			0x7c0007b4
 #define PPC_INST_BRANCH			0x48000000
 #define PPC_INST_BRANCH_COND		0x40800000
-#define PPC_INST_LBZCIX			0x7c0006aa
-#define PPC_INST_STBCIX			0x7c0007aa
-#define PPC_INST_LWZX			0x7c00002e
-#define PPC_INST_LFSX			0x7c00042e
-#define PPC_INST_STFSX			0x7c00052e
-#define PPC_INST_LFDX			0x7c0004ae
-#define PPC_INST_STFDX			0x7c0005ae
-#define PPC_INST_LVX			0x7c0000ce
-#define PPC_INST_STVX			0x7c0001ce
-#define PPC_INST_VCMPEQUD		0x100000c7
-#define PPC_INST_VCMPEQUB		0x10000006
+
+/* Prefixes */
+#define PPC_INST_LFS			0xc0000000
+#define PPC_INST_STFS			0xd0000000
+#define PPC_INST_LFD			0xc8000000
+#define PPC_INST_STFD			0xd8000000
+#define PPC_PREFIX_MLS			0x06000000
+#define PPC_PREFIX_8LS			0x04000000
+
+/* Prefixed instructions */
+#define PPC_INST_PLD			0xe4000000
+#define PPC_INST_PSTD			0xf4000000
 
 /* macros to insert fields into opcodes */
 #define ___PPC_RA(a)	(((a) & 0x1f) << 16)
@@ -411,6 +308,7 @@
 #define __PPC_CT(t)	(((t) & 0x0f) << 21)
 #define __PPC_SPR(r)	((((r) & 0x1f) << 16) | ((((r) >> 5) & 0x1f) << 11))
 #define __PPC_RC21	(0x1 << 10)
+#define __PPC_PRFX_R(r)	(((r) & 0x1) << 20)
 
 /*
  * Both low and high 16 bits are added as SIGNED additions, so if low 16 bits
@@ -431,175 +329,278 @@
 #define __PPC_EH(eh)	0
 #endif
 
-/* Deal with instructions that older assemblers aren't aware of */
-#define	PPC_CP_ABORT		stringify_in_c(.long PPC_INST_CP_ABORT)
-#define	PPC_COPY(a, b)		stringify_in_c(.long PPC_INST_COPY | \
-					___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_DARN(t, l)		stringify_in_c(.long PPC_INST_DARN |  \
-						___PPC_RT(t)	   |  \
-						(((l) & 0x3) << 16))
-#define	PPC_DCBAL(a, b)		stringify_in_c(.long PPC_INST_DCBAL | \
-					__PPC_RA(a) | __PPC_RB(b))
-#define	PPC_DCBZL(a, b)		stringify_in_c(.long PPC_INST_DCBZL | \
-					__PPC_RA(a) | __PPC_RB(b))
-#define PPC_LQARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LQARX | \
-					___PPC_RT(t) | ___PPC_RA(a) | \
-					___PPC_RB(b) | __PPC_EH(eh))
-#define PPC_LDARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LDARX | \
-					___PPC_RT(t) | ___PPC_RA(a) | \
-					___PPC_RB(b) | __PPC_EH(eh))
-#define PPC_LWARX(t, a, b, eh)	stringify_in_c(.long PPC_INST_LWARX | \
-					___PPC_RT(t) | ___PPC_RA(a) | \
-					___PPC_RB(b) | __PPC_EH(eh))
-#define PPC_STQCX(t, a, b)	stringify_in_c(.long PPC_INST_STQCX | \
-					___PPC_RT(t) | ___PPC_RA(a) | \
-					___PPC_RB(b))
-#define PPC_MADDHD(t, a, b, c)	stringify_in_c(.long PPC_INST_MADDHD | \
-					___PPC_RT(t) | ___PPC_RA(a)  | \
-					___PPC_RB(b) | ___PPC_RC(c))
-#define PPC_MADDHDU(t, a, b, c)	stringify_in_c(.long PPC_INST_MADDHDU | \
-					___PPC_RT(t) | ___PPC_RA(a)   | \
-					___PPC_RB(b) | ___PPC_RC(c))
-#define PPC_MADDLD(t, a, b, c)	stringify_in_c(.long PPC_INST_MADDLD | \
-					___PPC_RT(t) | ___PPC_RA(a)  | \
-					___PPC_RB(b) | ___PPC_RC(c))
-#define PPC_MSGSND(b)		stringify_in_c(.long PPC_INST_MSGSND | \
-					___PPC_RB(b))
-#define PPC_MSGSYNC		stringify_in_c(.long PPC_INST_MSGSYNC)
-#define PPC_MSGCLR(b)		stringify_in_c(.long PPC_INST_MSGCLR | \
-					___PPC_RB(b))
-#define PPC_MSGSNDP(b)		stringify_in_c(.long PPC_INST_MSGSNDP | \
-					___PPC_RB(b))
-#define PPC_MSGCLRP(b)		stringify_in_c(.long PPC_INST_MSGCLRP | \
-					___PPC_RB(b))
-#define PPC_PASTE(a, b)		stringify_in_c(.long PPC_INST_PASTE | \
-					___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_POPCNTB(a, s)	stringify_in_c(.long PPC_INST_POPCNTB | \
-					__PPC_RA(a) | __PPC_RS(s))
-#define PPC_POPCNTD(a, s)	stringify_in_c(.long PPC_INST_POPCNTD | \
-					__PPC_RA(a) | __PPC_RS(s))
-#define PPC_POPCNTW(a, s)	stringify_in_c(.long PPC_INST_POPCNTW | \
-					__PPC_RA(a) | __PPC_RS(s))
-#define PPC_RFCI		stringify_in_c(.long PPC_INST_RFCI)
-#define PPC_RFDI		stringify_in_c(.long PPC_INST_RFDI)
-#define PPC_RFMCI		stringify_in_c(.long PPC_INST_RFMCI)
-#define PPC_TLBILX(t, a, b)	stringify_in_c(.long PPC_INST_TLBILX | \
-					__PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b))
-#define PPC_TLBILX_ALL(a, b)	PPC_TLBILX(0, a, b)
-#define PPC_TLBILX_PID(a, b)	PPC_TLBILX(1, a, b)
-#define PPC_TLBILX_VA(a, b)	PPC_TLBILX(3, a, b)
-#define PPC_WAIT(w)		stringify_in_c(.long PPC_INST_WAIT | \
-					__PPC_WC(w))
-#define PPC_TLBIE(lp,a) 	stringify_in_c(.long PPC_INST_TLBIE | \
-					       ___PPC_RB(a) | ___PPC_RS(lp))
-#define	PPC_TLBIE_5(rb,rs,ric,prs,r) \
-				stringify_in_c(.long PPC_INST_TLBIE | \
-					___PPC_RB(rb) | ___PPC_RS(rs) | \
-					___PPC_RIC(ric) | ___PPC_PRS(prs) | \
-					___PPC_R(r))
-#define	PPC_TLBIEL(rb,rs,ric,prs,r) \
-				stringify_in_c(.long PPC_INST_TLBIEL | \
-					___PPC_RB(rb) | ___PPC_RS(rs) | \
-					___PPC_RIC(ric) | ___PPC_PRS(prs) | \
-					___PPC_R(r))
-#define PPC_TLBSRX_DOT(a,b)	stringify_in_c(.long PPC_INST_TLBSRX_DOT | \
-					__PPC_RA0(a) | __PPC_RB(b))
-#define PPC_TLBIVAX(a,b)	stringify_in_c(.long PPC_INST_TLBIVAX | \
-					__PPC_RA0(a) | __PPC_RB(b))
-
-#define PPC_ERATWE(s, a, w)	stringify_in_c(.long PPC_INST_ERATWE | \
-					__PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
-#define PPC_ERATRE(s, a, w)	stringify_in_c(.long PPC_INST_ERATRE | \
-					__PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
-#define PPC_ERATILX(t, a, b)	stringify_in_c(.long PPC_INST_ERATILX | \
-					__PPC_T_TLB(t) | __PPC_RA0(a) | \
-					__PPC_RB(b))
-#define PPC_ERATIVAX(s, a, b)	stringify_in_c(.long PPC_INST_ERATIVAX | \
-					__PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b))
-#define PPC_ERATSX(t, a, w)	stringify_in_c(.long PPC_INST_ERATSX | \
-					__PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
-#define PPC_ERATSX_DOT(t, a, w)	stringify_in_c(.long PPC_INST_ERATSX_DOT | \
-					__PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
-#define PPC_SLBFEE_DOT(t, b)	stringify_in_c(.long PPC_INST_SLBFEE | \
-					__PPC_RT(t) | __PPC_RB(b))
-#define __PPC_SLBFEE_DOT(t, b)	stringify_in_c(.long PPC_INST_SLBFEE |	\
-					       ___PPC_RT(t) | ___PPC_RB(b))
-#define PPC_ICBT(c,a,b)		stringify_in_c(.long PPC_INST_ICBT | \
-				       __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b))
-/* PASemi instructions */
-#define LBZCIX(t,a,b)		stringify_in_c(.long PPC_INST_LBZCIX | \
-				       __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b))
-#define STBCIX(s,a,b)		stringify_in_c(.long PPC_INST_STBCIX | \
-				       __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
-
+/* Base instruction encoding */
+#define PPC_RAW_CP_ABORT		(0x7c00068c)
+#define PPC_RAW_COPY(a, b)		(PPC_INST_COPY | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DARN(t, l)		(0x7c0005e6 | ___PPC_RT(t) | (((l) & 0x3) << 16))
+#define PPC_RAW_DCBAL(a, b)		(0x7c2005ec | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_DCBZL(a, b)		(0x7c2007ec | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_LQARX(t, a, b, eh)	(0x7c000228 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_RAW_LDARX(t, a, b, eh)	(0x7c0000a8 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_RAW_LWARX(t, a, b, eh)	(0x7c000028 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | __PPC_EH(eh))
+#define PPC_RAW_PHWSYNC			(0x7c8004ac)
+#define PPC_RAW_PLWSYNC			(0x7ca004ac)
+#define PPC_RAW_STQCX(t, a, b)		(0x7c00016d | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MADDHD(t, a, b, c)	(0x10000030 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_RAW_MADDHDU(t, a, b, c)	(0x10000031 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_RAW_MADDLD(t, a, b, c)	(0x10000033 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | ___PPC_RC(c))
+#define PPC_RAW_MSGSND(b)		(0x7c00019c | ___PPC_RB(b))
+#define PPC_RAW_MSGSYNC			(0x7c0006ec)
+#define PPC_RAW_MSGCLR(b)		(0x7c0001dc | ___PPC_RB(b))
+#define PPC_RAW_MSGSNDP(b)		(0x7c00011c | ___PPC_RB(b))
+#define PPC_RAW_MSGCLRP(b)		(0x7c00015c | ___PPC_RB(b))
+#define PPC_RAW_PASTE(a, b)		(0x7c20070d | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_POPCNTB(a, s)		(PPC_INST_POPCNTB | __PPC_RA(a) | __PPC_RS(s))
+#define PPC_RAW_POPCNTD(a, s)		(0x7c0003f4 | __PPC_RA(a) | __PPC_RS(s))
+#define PPC_RAW_POPCNTW(a, s)		(0x7c0002f4 | __PPC_RA(a) | __PPC_RS(s))
+#define PPC_RAW_RFCI			(0x4c000066)
+#define PPC_RAW_RFDI			(0x4c00004e)
+#define PPC_RAW_RFMCI			(0x4c00004c)
+#define PPC_RAW_TLBILX(t, a, b)		(0x7c000024 | __PPC_T_TLB(t) | 	__PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_WAIT(w)			(0x7c00007c | __PPC_WC(w))
+#define PPC_RAW_TLBIE(lp, a)		(0x7c000264 | ___PPC_RB(a) | ___PPC_RS(lp))
+#define PPC_RAW_TLBIE_5(rb, rs, ric, prs, r) \
+	(0x7c000264 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r))
+#define PPC_RAW_TLBIEL(rb, rs, ric, prs, r) \
+	(0x7c000224 | ___PPC_RB(rb) | ___PPC_RS(rs) | ___PPC_RIC(ric) | ___PPC_PRS(prs) | ___PPC_R(r))
+#define PPC_RAW_TLBSRX_DOT(a, b)	(0x7c0006a5 | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_TLBIVAX(a, b)		(0x7c000624 | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATWE(s, a, w)		(0x7c0001a6 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_RAW_ERATRE(s, a, w)		(0x7c000166 | __PPC_RS(s) | __PPC_RA(a) | __PPC_WS(w))
+#define PPC_RAW_ERATILX(t, a, b)	(0x7c000066 | __PPC_T_TLB(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATIVAX(s, a, b)	(0x7c000666 | __PPC_RS(s) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATSX(t, a, w)		(0x7c000126 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_ERATSX_DOT(t, a, w)	(0x7c000127 | __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_SLBFEE_DOT(t, b)	(0x7c0007a7 | __PPC_RT(t) | __PPC_RB(b))
+#define __PPC_RAW_SLBFEE_DOT(t, b)	(0x7c0007a7 | ___PPC_RT(t) | ___PPC_RB(b))
+#define PPC_RAW_ICBT(c, a, b)		(0x7c00002c | __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b))
+#define PPC_RAW_LBZCIX(t, a, b)		(0x7c0006aa | __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_STBCIX(s, a, b)		(0x7c0007aa | __PPC_RS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define PPC_RAW_DCBFPS(a, b)		(0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (4 << 21))
+#define PPC_RAW_DCBSTPS(a, b)		(0x7c0000ac | ___PPC_RA(a) | ___PPC_RB(b) | (6 << 21))
 /*
  * Define what the VSX XX1 form instructions will look like, then add
  * the 128 bit load store instructions based on that.
  */
-#define VSX_XX1(s, a, b)	(__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
-#define VSX_XX3(t, a, b)	(__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
-#define STXVD2X(s, a, b)	stringify_in_c(.long PPC_INST_STXVD2X | \
-					       VSX_XX1((s), a, b))
-#define LXVD2X(s, a, b)		stringify_in_c(.long PPC_INST_LXVD2X | \
-					       VSX_XX1((s), a, b))
-#define MFVRD(a, t)		stringify_in_c(.long PPC_INST_MFVSRD | \
-					       VSX_XX1((t)+32, a, R0))
-#define MTVRD(t, a)		stringify_in_c(.long PPC_INST_MTVSRD | \
-					       VSX_XX1((t)+32, a, R0))
-#define VPMSUMW(t, a, b)	stringify_in_c(.long PPC_INST_VPMSUMW | \
-					       VSX_XX3((t), a, b))
-#define VPMSUMD(t, a, b)	stringify_in_c(.long PPC_INST_VPMSUMD | \
-					       VSX_XX3((t), a, b))
-#define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
-					       VSX_XX3((t), a, b))
-#define XXSWAPD(t, a)		stringify_in_c(.long PPC_INST_XXSWAPD | \
-					       VSX_XX3((t), a, a))
-#define XVCPSGNDP(t, a, b)	stringify_in_c(.long (PPC_INST_XVCPSGNDP | \
-					       VSX_XX3((t), (a), (b))))
+#define VSX_XX1(s, a, b)		(__PPC_XS(s) | __PPC_RA(a) | __PPC_RB(b))
+#define VSX_XX3(t, a, b)		(__PPC_XT(t) | __PPC_XA(a) | __PPC_XB(b))
+#define PPC_RAW_STXVD2X(s, a, b)	(0x7c000798 | VSX_XX1((s), a, b))
+#define PPC_RAW_LXVD2X(s, a, b)		(0x7c000698 | VSX_XX1((s), a, b))
+#define PPC_RAW_MFVRD(a, t)		(0x7c000066 | VSX_XX1((t) + 32, a, R0))
+#define PPC_RAW_MTVRD(t, a)		(0x7c000166 | VSX_XX1((t) + 32, a, R0))
+#define PPC_RAW_VPMSUMW(t, a, b)	(0x10000488 | VSX_XX3((t), a, b))
+#define PPC_RAW_VPMSUMD(t, a, b)	(0x100004c8 | VSX_XX3((t), a, b))
+#define PPC_RAW_XXLOR(t, a, b)		(0xf0000490 | VSX_XX3((t), a, b))
+#define PPC_RAW_XXSWAPD(t, a)		(0xf0000250 | VSX_XX3((t), a, a))
+#define PPC_RAW_XVCPSGNDP(t, a, b)	((0xf0000780 | VSX_XX3((t), (a), (b))))
+#define PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc) \
+	((0x1000002d | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6)))
+#define PPC_RAW_NAP			(0x4c000364)
+#define PPC_RAW_SLEEP			(0x4c0003a4)
+#define PPC_RAW_WINKLE			(0x4c0003e4)
+#define PPC_RAW_STOP			(0x4c0002e4)
+#define PPC_RAW_CLRBHRB			(0x7c00035c)
+#define PPC_RAW_MFBHRBE(r, n)		(0x7c00025c | __PPC_RT(r) | (((n) & 0x3ff) << 11))
+#define PPC_RAW_TRECHKPT		(PPC_INST_TRECHKPT)
+#define PPC_RAW_TRECLAIM(r)		(PPC_INST_TRECLAIM | __PPC_RA(r))
+#define PPC_RAW_TABORT(r)		(0x7c00071d | __PPC_RA(r))
+#define TMRN(x)				((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6))
+#define PPC_RAW_MTTMR(tmr, r)		(0x7c0003dc | TMRN(tmr) | ___PPC_RS(r))
+#define PPC_RAW_MFTMR(tmr, r)		(0x7c0002dc | TMRN(tmr) | ___PPC_RT(r))
+#define PPC_RAW_ICSWX(s, a, b)		(0x7c00032d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ICSWEPX(s, a, b)	(0x7c00076d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SLBIA(IH)		(0x7c0003e4 | (((IH) & 0x7) << 21))
+#define PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb) \
+	(0x100000c7 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21)
+#define PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb) \
+	(0x10000006 | ___PPC_RT(vrt) | ___PPC_RA(vra) | ___PPC_RB(vrb) | __PPC_RC21)
+#define PPC_RAW_LD(r, base, i)		(PPC_INST_LD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_DS(i))
+#define PPC_RAW_LWZ(r, base, i)		(0x80000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LWZX(t, a, b)		(0x7c00002e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STD(r, base, i)		(PPC_INST_STD | ___PPC_RS(r) | ___PPC_RA(base) | IMM_DS(i))
+#define PPC_RAW_STDCX(s, a, b)		(0x7c0001ad | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_LFSX(t, a, b)		(0x7c00042e | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STFSX(s, a, b)		(0x7c00052e | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_LFDX(t, a, b)		(0x7c0004ae | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STFDX(s, a, b)		(0x7c0005ae | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_LVX(t, a, b)		(0x7c0000ce | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_STVX(s, a, b)		(0x7c0001ce | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADD(t, a, b)		(PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADD_DOT(t, a, b)	(PPC_INST_ADD | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_ADDC(t, a, b)		(0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_ADDC_DOT(t, a, b)	(0x7c000014 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_NOP()			(PPC_INST_NOP)
+#define PPC_RAW_BLR()			(PPC_INST_BLR)
+#define PPC_RAW_BLRL()			(0x4e800021)
+#define PPC_RAW_MTLR(r)			(0x7c0803a6 | ___PPC_RT(r))
+#define PPC_RAW_BCTR()			(PPC_INST_BCTR)
+#define PPC_RAW_MTCTR(r)		(PPC_INST_MTCTR | ___PPC_RT(r))
+#define PPC_RAW_ADDI(d, a, i)		(PPC_INST_ADDI | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_LI(r, i)		PPC_RAW_ADDI(r, 0, i)
+#define PPC_RAW_ADDIS(d, a, i)		(PPC_INST_ADDIS | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_LIS(r, i)		PPC_RAW_ADDIS(r, 0, i)
+#define PPC_RAW_STDX(r, base, b)	(0x7c00012a | ___PPC_RS(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_STDU(r, base, i)	(0xf8000001 | ___PPC_RS(r) | ___PPC_RA(base) | ((i) & 0xfffc))
+#define PPC_RAW_STW(r, base, i)		(0x90000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_STWU(r, base, i)	(0x94000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_STH(r, base, i)		(0xb0000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_STB(r, base, i)		(0x98000000 | ___PPC_RS(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LBZ(r, base, i)		(0x88000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LDX(r, base, b)		(0x7c00002a | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LHZ(r, base, i)		(0xa0000000 | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+#define PPC_RAW_LHBRX(r, base, b)	(0x7c00062c | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_LDBRX(r, base, b)	(0x7c000428 | ___PPC_RT(r) | ___PPC_RA(base) | ___PPC_RB(b))
+#define PPC_RAW_STWCX(s, a, b)		(0x7c00012d | ___PPC_RS(s) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPWI(a, i)		(0x2c000000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPDI(a, i)		(0x2c200000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPW(a, b)		(0x7c000000 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPD(a, b)		(0x7c200000 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPLWI(a, i)		(0x28000000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPLDI(a, i)		(0x28200000 | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_CMPLW(a, b)		(0x7c000040 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_CMPLD(a, b)		(0x7c200040 | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_SUB(d, a, b)		(0x7c000050 | ___PPC_RT(d) | ___PPC_RB(a) | ___PPC_RA(b))
+#define PPC_RAW_MULD(d, a, b)		(0x7c0001d2 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MULW(d, a, b)		(0x7c0001d6 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MULHWU(d, a, b)		(0x7c000016 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_MULI(d, a, i)		(0x1c000000 | ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
+#define PPC_RAW_DIVWU(d, a, b)		(0x7c000396 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDU(d, a, b)		(0x7c000392 | ___PPC_RT(d) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDE(t, a, b)		(0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDE_DOT(t, a, b)	(0x7c000352 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_DIVDEU(t, a, b)		(0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b))
+#define PPC_RAW_DIVDEU_DOT(t, a, b)	(0x7c000312 | ___PPC_RT(t) | ___PPC_RA(a) | ___PPC_RB(b) | 0x1)
+#define PPC_RAW_AND(d, a, b)		(0x7c000038 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_ANDI(d, a, i)		(0x70000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_AND_DOT(d, a, b)	(0x7c000039 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_OR(d, a, b)		(0x7c000378 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_MR(d, a)		PPC_RAW_OR(d, a, a)
+#define PPC_RAW_ORI(d, a, i)		(PPC_INST_ORI | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_ORIS(d, a, i)		(PPC_INST_ORIS | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_XOR(d, a, b)		(0x7c000278 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(b))
+#define PPC_RAW_XORI(d, a, i)		(0x68000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_XORIS(d, a, i)		(0x6c000000 | ___PPC_RA(d) | ___PPC_RS(a) | IMM_L(i))
+#define PPC_RAW_EXTSW(d, a)		(0x7c0007b4 | ___PPC_RA(d) | ___PPC_RS(a))
+#define PPC_RAW_SLW(d, a, s)		(0x7c000030 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SLD(d, a, s)		(0x7c000036 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRW(d, a, s)		(0x7c000430 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRAW(d, a, s)		(0x7c000630 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRAWI(d, a, i)		(0x7c000670 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i))
+#define PPC_RAW_SRD(d, a, s)		(0x7c000436 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRAD(d, a, s)		(0x7c000634 | ___PPC_RA(d) | ___PPC_RS(a) | ___PPC_RB(s))
+#define PPC_RAW_SRADI(d, a, i)		(0x7c000674 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i))
+#define PPC_RAW_RLWINM(d, a, i, mb, me)	(0x54000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RAW_RLWINM_DOT(d, a, i, mb, me) \
+					(0x54000001 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RAW_RLWIMI(d, a, i, mb, me) (0x50000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH(i) | __PPC_MB(mb) | __PPC_ME(me))
+#define PPC_RAW_RLDICL(d, a, i, mb)     (0x78000000 | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_MB64(mb))
+#define PPC_RAW_RLDICR(d, a, i, me)     (PPC_INST_RLDICR | ___PPC_RA(d) | ___PPC_RS(a) | __PPC_SH64(i) | __PPC_ME64(me))
+
+/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
+#define PPC_RAW_SLWI(d, a, i)		PPC_RAW_RLWINM(d, a, i, 0, 31-(i))
+/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */
+#define PPC_RAW_SRWI(d, a, i)		PPC_RAW_RLWINM(d, a, 32-(i), i, 31)
+/* sldi = rldicr Rx, Ry, n, 63-n */
+#define PPC_RAW_SLDI(d, a, i)		PPC_RAW_RLDICR(d, a, i, 63-(i))
+/* sldi = rldicl Rx, Ry, 64-n, n */
+#define PPC_RAW_SRDI(d, a, i)		PPC_RAW_RLDICL(d, a, 64-(i), i)
+
+#define PPC_RAW_NEG(d, a)		(0x7c0000d0 | ___PPC_RT(d) | ___PPC_RA(a))
+
+/* Deal with instructions that older assemblers aren't aware of */
+#define	PPC_BCCTR_FLUSH		stringify_in_c(.long PPC_INST_BCCTR_FLUSH)
+#define	PPC_CP_ABORT		stringify_in_c(.long PPC_RAW_CP_ABORT)
+#define	PPC_COPY(a, b)		stringify_in_c(.long PPC_RAW_COPY(a, b))
+#define PPC_DARN(t, l)		stringify_in_c(.long PPC_RAW_DARN(t, l))
+#define	PPC_DCBAL(a, b)		stringify_in_c(.long PPC_RAW_DCBAL(a, b))
+#define	PPC_DCBZL(a, b)		stringify_in_c(.long PPC_RAW_DCBZL(a, b))
+#define	PPC_DIVDE(t, a, b)	stringify_in_c(.long PPC_RAW_DIVDE(t, a, b))
+#define	PPC_DIVDEU(t, a, b)	stringify_in_c(.long PPC_RAW_DIVDEU(t, a, b))
+#define PPC_LQARX(t, a, b, eh)	stringify_in_c(.long PPC_RAW_LQARX(t, a, b, eh))
+#define PPC_LDARX(t, a, b, eh)	stringify_in_c(.long PPC_RAW_LDARX(t, a, b, eh))
+#define PPC_LWARX(t, a, b, eh)	stringify_in_c(.long PPC_RAW_LWARX(t, a, b, eh))
+#define PPC_STQCX(t, a, b)	stringify_in_c(.long PPC_RAW_STQCX(t, a, b))
+#define PPC_MADDHD(t, a, b, c)	stringify_in_c(.long PPC_RAW_MADDHD(t, a, b, c))
+#define PPC_MADDHDU(t, a, b, c)	stringify_in_c(.long PPC_RAW_MADDHDU(t, a, b, c))
+#define PPC_MADDLD(t, a, b, c)	stringify_in_c(.long PPC_RAW_MADDLD(t, a, b, c))
+#define PPC_MSGSND(b)		stringify_in_c(.long PPC_RAW_MSGSND(b))
+#define PPC_MSGSYNC		stringify_in_c(.long PPC_RAW_MSGSYNC)
+#define PPC_MSGCLR(b)		stringify_in_c(.long PPC_RAW_MSGCLR(b))
+#define PPC_MSGSNDP(b)		stringify_in_c(.long PPC_RAW_MSGSNDP(b))
+#define PPC_MSGCLRP(b)		stringify_in_c(.long PPC_RAW_MSGCLRP(b))
+#define PPC_PASTE(a, b)		stringify_in_c(.long PPC_RAW_PASTE(a, b))
+#define PPC_POPCNTB(a, s)	stringify_in_c(.long PPC_RAW_POPCNTB(a, s))
+#define PPC_POPCNTD(a, s)	stringify_in_c(.long PPC_RAW_POPCNTD(a, s))
+#define PPC_POPCNTW(a, s)	stringify_in_c(.long PPC_RAW_POPCNTW(a, s))
+#define PPC_RFCI		stringify_in_c(.long PPC_RAW_RFCI)
+#define PPC_RFDI		stringify_in_c(.long PPC_RAW_RFDI)
+#define PPC_RFMCI		stringify_in_c(.long PPC_RAW_RFMCI)
+#define PPC_TLBILX(t, a, b)	stringify_in_c(.long PPC_RAW_TLBILX(t, a, b))
+#define PPC_TLBILX_ALL(a, b)	PPC_TLBILX(0, a, b)
+#define PPC_TLBILX_PID(a, b)	PPC_TLBILX(1, a, b)
+#define PPC_TLBILX_VA(a, b)	PPC_TLBILX(3, a, b)
+#define PPC_WAIT(w)		stringify_in_c(.long PPC_RAW_WAIT(w))
+#define PPC_TLBIE(lp, a) 	stringify_in_c(.long PPC_RAW_TLBIE(lp, a))
+#define	PPC_TLBIE_5(rb, rs, ric, prs, r) \
+				stringify_in_c(.long PPC_RAW_TLBIE_5(rb, rs, ric, prs, r))
+#define	PPC_TLBIEL(rb,rs,ric,prs,r) \
+				stringify_in_c(.long PPC_RAW_TLBIEL(rb, rs, ric, prs, r))
+#define PPC_TLBSRX_DOT(a, b)	stringify_in_c(.long PPC_RAW_TLBSRX_DOT(a, b))
+#define PPC_TLBIVAX(a, b)	stringify_in_c(.long PPC_RAW_TLBIVAX(a, b))
+
+#define PPC_ERATWE(s, a, w)	stringify_in_c(.long PPC_RAW_ERATWE(s, a, w))
+#define PPC_ERATRE(s, a, w)	stringify_in_c(.long PPC_RAW_ERATRE(a, a, w))
+#define PPC_ERATILX(t, a, b)	stringify_in_c(.long PPC_RAW_ERATILX(t, a, b))
+#define PPC_ERATIVAX(s, a, b)	stringify_in_c(.long PPC_RAW_ERATIVAX(s, a, b))
+#define PPC_ERATSX(t, a, w)	stringify_in_c(.long PPC_RAW_ERATSX(t, a, w))
+#define PPC_ERATSX_DOT(t, a, w)	stringify_in_c(.long PPC_RAW_ERATSX_DOT(t, a, w))
+#define PPC_SLBFEE_DOT(t, b)	stringify_in_c(.long PPC_RAW_SLBFEE_DOT(t, b))
+#define __PPC_SLBFEE_DOT(t, b)	stringify_in_c(.long __PPC_RAW_SLBFEE_DOT(t, b))
+#define PPC_ICBT(c, a, b)	stringify_in_c(.long PPC_RAW_ICBT(c, a, b))
+/* PASemi instructions */
+#define LBZCIX(t, a, b)		stringify_in_c(.long PPC_RAW_LBZCIX(t, a, b))
+#define STBCIX(s, a, b)		stringify_in_c(.long PPC_RAW_STBCIX(s, a, b))
+#define PPC_DCBFPS(a, b)	stringify_in_c(.long PPC_RAW_DCBFPS(a, b))
+#define PPC_DCBSTPS(a, b)	stringify_in_c(.long PPC_RAW_DCBSTPS(a, b))
+#define PPC_PHWSYNC		stringify_in_c(.long PPC_RAW_PHWSYNC)
+#define PPC_PLWSYNC		stringify_in_c(.long PPC_RAW_PLWSYNC)
+#define STXVD2X(s, a, b)	stringify_in_c(.long PPC_RAW_STXVD2X(s, a, b))
+#define LXVD2X(s, a, b)		stringify_in_c(.long PPC_RAW_LXVD2X(s, a, b))
+#define MFVRD(a, t)		stringify_in_c(.long PPC_RAW_MFVRD(a, t))
+#define MTVRD(t, a)		stringify_in_c(.long PPC_RAW_MTVRD(t, a))
+#define VPMSUMW(t, a, b)	stringify_in_c(.long PPC_RAW_VPMSUMW(t, a, b))
+#define VPMSUMD(t, a, b)	stringify_in_c(.long PPC_RAW_VPMSUMD(t, a, b))
+#define XXLOR(t, a, b)		stringify_in_c(.long PPC_RAW_XXLOR(t, a, b))
+#define XXSWAPD(t, a)		stringify_in_c(.long PPC_RAW_XXSWAPD(t, a))
+#define XVCPSGNDP(t, a, b)	stringify_in_c(.long (PPC_RAW_XVCPSGNDP(t, a, b)))
 
 #define VPERMXOR(vrt, vra, vrb, vrc)				\
-	stringify_in_c(.long (PPC_INST_VPERMXOR |		\
-			      ___PPC_RT(vrt) | ___PPC_RA(vra) | \
-			      ___PPC_RB(vrb) | (((vrc) & 0x1f) << 6)))
+	stringify_in_c(.long (PPC_RAW_VPERMXOR(vrt, vra, vrb, vrc)))
 
-#define PPC_NAP			stringify_in_c(.long PPC_INST_NAP)
-#define PPC_SLEEP		stringify_in_c(.long PPC_INST_SLEEP)
-#define PPC_WINKLE		stringify_in_c(.long PPC_INST_WINKLE)
+#define PPC_NAP			stringify_in_c(.long PPC_RAW_NAP)
+#define PPC_SLEEP		stringify_in_c(.long PPC_RAW_SLEEP)
+#define PPC_WINKLE		stringify_in_c(.long PPC_RAW_WINKLE)
 
-#define PPC_STOP		stringify_in_c(.long PPC_INST_STOP)
+#define PPC_STOP		stringify_in_c(.long PPC_RAW_STOP)
 
 /* BHRB instructions */
-#define PPC_CLRBHRB		stringify_in_c(.long PPC_INST_CLRBHRB)
-#define PPC_MFBHRBE(r, n)	stringify_in_c(.long PPC_INST_BHRBE | \
-						__PPC_RT(r) | \
-							(((n) & 0x3ff) << 11))
+#define PPC_CLRBHRB		stringify_in_c(.long PPC_RAW_CLRBHRB)
+#define PPC_MFBHRBE(r, n)	stringify_in_c(.long PPC_RAW_MFBHRBE(r, n))
 
 /* Transactional memory instructions */
-#define TRECHKPT		stringify_in_c(.long PPC_INST_TRECHKPT)
-#define TRECLAIM(r)		stringify_in_c(.long PPC_INST_TRECLAIM \
-					       | __PPC_RA(r))
-#define TABORT(r)		stringify_in_c(.long PPC_INST_TABORT \
-					       | __PPC_RA(r))
+#define TRECHKPT		stringify_in_c(.long PPC_RAW_TRECHKPT)
+#define TRECLAIM(r)		stringify_in_c(.long PPC_RAW_TRECLAIM(r))
+#define TABORT(r)		stringify_in_c(.long PPC_RAW_TABORT(r))
 
 /* book3e thread control instructions */
-#define TMRN(x)			((((x) & 0x1f) << 16) | (((x) & 0x3e0) << 6))
-#define MTTMR(tmr, r)		stringify_in_c(.long PPC_INST_MTTMR | \
-					       TMRN(tmr) | ___PPC_RS(r))
-#define MFTMR(tmr, r)		stringify_in_c(.long PPC_INST_MFTMR | \
-					       TMRN(tmr) | ___PPC_RT(r))
+#define MTTMR(tmr, r)		stringify_in_c(.long PPC_RAW_MTTMR(tmr, r))
+#define MFTMR(tmr, r)		stringify_in_c(.long PPC_RAW_MFTMR(tmr, r))
 
 /* Coprocessor instructions */
-#define PPC_ICSWX(s, a, b)	stringify_in_c(.long PPC_INST_ICSWX |	\
-					       ___PPC_RS(s) |		\
-					       ___PPC_RA(a) |		\
-					       ___PPC_RB(b))
-#define PPC_ICSWEPX(s, a, b)	stringify_in_c(.long PPC_INST_ICSWEPX | \
-					       ___PPC_RS(s) |		\
-					       ___PPC_RA(a) |		\
-					       ___PPC_RB(b))
+#define PPC_ICSWX(s, a, b)	stringify_in_c(.long PPC_RAW_ICSWX(s, a, b))
+#define PPC_ICSWEPX(s, a, b)	stringify_in_c(.long PPC_RAW_ICSWEPX(s, a, b))
 
-#define PPC_SLBIA(IH)	stringify_in_c(.long PPC_INST_SLBIA | \
-				       ((IH & 0x7) << 21))
+#define PPC_SLBIA(IH)	stringify_in_c(.long PPC_RAW_SLBIA(IH))
 
 /*
  * These may only be used on ISA v3.0 or later (aka. CPU_FTR_ARCH_300, radix
@@ -611,12 +612,8 @@
 #define PPC_RADIX_INVALIDATE_ERAT_USER	PPC_SLBIA(3)
 #define PPC_RADIX_INVALIDATE_ERAT_GUEST	PPC_SLBIA(6)
 
-#define VCMPEQUD_RC(vrt, vra, vrb)	stringify_in_c(.long PPC_INST_VCMPEQUD | \
-			      ___PPC_RT(vrt) | ___PPC_RA(vra) | \
-			      ___PPC_RB(vrb) | __PPC_RC21)
+#define VCMPEQUD_RC(vrt, vra, vrb)	stringify_in_c(.long PPC_RAW_VCMPEQUD_RC(vrt, vra, vrb))
 
-#define VCMPEQUB_RC(vrt, vra, vrb)	stringify_in_c(.long PPC_INST_VCMPEQUB | \
-			      ___PPC_RT(vrt) | ___PPC_RA(vra) | \
-			      ___PPC_RB(vrb) | __PPC_RC21)
+#define VCMPEQUB_RC(vrt, vra, vrb)	stringify_in_c(.long PPC_RAW_VCMPEQUB_RC(vrt, vra, vrb))
 
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 6b03dff..b4cc660 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -755,6 +755,8 @@
 #define N_SLINE	68
 #define N_SO	100
 
+#define RFSCV	.long 0x4c0000a4
+
 /*
  * Create an endian fixup trampoline
  *
@@ -774,7 +776,7 @@
 #define FIXUP_ENDIAN
 #else
 /*
- * This version may be used in in HV or non-HV context.
+ * This version may be used in HV or non-HV context.
  * MSR[EE] must be disabled.
  */
 #define FIXUP_ENDIAN						   \
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 52a6783..ed0d633 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -237,7 +237,6 @@
 #ifdef CONFIG_PPC_MEM_KEYS
 	unsigned long	amr;
 	unsigned long	iamr;
-	unsigned long	uamor;
 #endif
 #ifdef CONFIG_KVM_BOOK3S_32_HANDLER
 	void*		kvm_shadow_vcpu; /* KVM internal data */
@@ -272,6 +271,10 @@
 	unsigned 	mmcr0;
 
 	unsigned 	used_ebb;
+	unsigned long   mmcr3;
+	unsigned long   sier2;
+	unsigned long   sier3;
+
 #endif
 };
 
diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h
index ac3970f..155a197 100644
--- a/arch/powerpc/include/asm/ptrace.h
+++ b/arch/powerpc/include/asm/ptrace.h
@@ -222,9 +222,14 @@
 	regs->trap = (regs->trap & TRAP_FLAGS_MASK) | (val & ~TRAP_FLAGS_MASK);
 }
 
+static inline bool trap_is_scv(struct pt_regs *regs)
+{
+	return (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && TRAP(regs) == 0x3000);
+}
+
 static inline bool trap_is_syscall(struct pt_regs *regs)
 {
-	return TRAP(regs) == 0xc00;
+	return (trap_is_scv(regs) || TRAP(regs) == 0xc00);
 }
 
 static inline bool trap_norestart(struct pt_regs *regs)
@@ -238,7 +243,7 @@
 }
 
 #define arch_has_single_step()	(1)
-#ifndef CONFIG_BOOK3S_601
+#ifndef CONFIG_PPC_BOOK3S_601
 #define arch_has_block_step()	(true)
 #else
 #define arch_has_block_step()	(false)
diff --git a/arch/powerpc/include/asm/qspinlock.h b/arch/powerpc/include/asm/qspinlock.h
new file mode 100644
index 0000000..b752d34
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -0,0 +1,91 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_QSPINLOCK_H
+#define _ASM_POWERPC_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+#include <asm/paravirt.h>
+
+#define _Q_PENDING_LOOPS	(1 << 9) /* not tuned */
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+extern void __pv_queued_spin_unlock(struct qspinlock *lock);
+
+static __always_inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
+{
+	if (!is_shared_processor())
+		native_queued_spin_lock_slowpath(lock, val);
+	else
+		__pv_queued_spin_lock_slowpath(lock, val);
+}
+
+#define queued_spin_unlock queued_spin_unlock
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+	if (!is_shared_processor())
+		smp_store_release(&lock->locked, 0);
+	else
+		__pv_queued_spin_unlock(lock);
+}
+
+#else
+extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
+#endif
+
+static __always_inline void queued_spin_lock(struct qspinlock *lock)
+{
+	u32 val = 0;
+
+	if (likely(atomic_try_cmpxchg_lock(&lock->val, &val, _Q_LOCKED_VAL)))
+		return;
+
+	queued_spin_lock_slowpath(lock, val);
+}
+#define queued_spin_lock queued_spin_lock
+
+#define smp_mb__after_spinlock()   smp_mb()
+
+static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
+{
+	/*
+	 * This barrier was added to simple spinlocks by commit 51d7d5205d338,
+	 * but it should now be possible to remove it, asm arm64 has done with
+	 * commit c6f5d02b6a0f.
+	 */
+	smp_mb();
+	return atomic_read(&lock->val);
+}
+#define queued_spin_is_locked queued_spin_is_locked
+
+#ifdef CONFIG_PARAVIRT_SPINLOCKS
+#define SPIN_THRESHOLD (1<<15) /* not tuned */
+
+static __always_inline void pv_wait(u8 *ptr, u8 val)
+{
+	if (*ptr != val)
+		return;
+	yield_to_any();
+	/*
+	 * We could pass in a CPU here if waiting in the queue and yield to
+	 * the previous CPU in the queue.
+	 */
+}
+
+static __always_inline void pv_kick(int cpu)
+{
+	prod_cpu(cpu);
+}
+
+extern void __pv_init_lock_hash(void);
+
+static inline void pv_spinlocks_init(void)
+{
+	__pv_init_lock_hash();
+}
+
+#endif
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_POWERPC_QSPINLOCK_H */
diff --git a/arch/powerpc/include/asm/qspinlock_paravirt.h b/arch/powerpc/include/asm/qspinlock_paravirt.h
new file mode 100644
index 0000000..6b60e77
--- /dev/null
+++ b/arch/powerpc/include/asm/qspinlock_paravirt.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_QSPINLOCK_PARAVIRT_H
+#define _ASM_POWERPC_QSPINLOCK_PARAVIRT_H
+
+EXPORT_SYMBOL(__pv_queued_spin_unlock);
+
+#endif /* _ASM_POWERPC_QSPINLOCK_PARAVIRT_H */
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 88e6c78..41419f1 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -12,6 +12,7 @@
 #ifdef __KERNEL__
 
 #include <linux/stringify.h>
+#include <linux/const.h>
 #include <asm/cputable.h>
 #include <asm/asm-const.h>
 #include <asm/feature-fixups.h>
@@ -876,7 +877,9 @@
 #define   MMCR0_FCHV	0x00000001UL /* freeze conditions in hypervisor mode */
 #define SPRN_MMCR1	798
 #define SPRN_MMCR2	785
+#define SPRN_MMCR3	754
 #define SPRN_UMMCR2	769
+#define SPRN_UMMCR3	738
 #define SPRN_MMCRA	0x312
 #define   MMCRA_SDSYNC	0x80000000UL /* SDAR synced with SIAR */
 #define   MMCRA_SDAR_DCACHE_MISS 0x40000000UL
@@ -886,6 +889,7 @@
 #define   MMCRA_SLOT	0x07000000UL /* SLOT bits (37-39) */
 #define   MMCRA_SLOT_SHIFT	24
 #define   MMCRA_SAMPLE_ENABLE 0x00000001UL /* enable sampling */
+#define   MMCRA_BHRB_DISABLE  _UL(0x2000000000) // BHRB disable bit for ISA v3.1
 #define   POWER6_MMCRA_SDSYNC 0x0000080000000000ULL	/* SDAR/SIAR synced */
 #define   POWER6_MMCRA_SIHV   0x0000040000000000ULL
 #define   POWER6_MMCRA_SIPR   0x0000020000000000ULL
@@ -918,6 +922,10 @@
 #define   SIER_SIHV		0x1000000	/* Sampled MSR_HV */
 #define   SIER_SIAR_VALID	0x0400000	/* SIAR contents valid */
 #define   SIER_SDAR_VALID	0x0200000	/* SDAR contents valid */
+#define SPRN_SIER2	752
+#define SPRN_SIER3	753
+#define SPRN_USIER2	736
+#define SPRN_USIER3	737
 #define SPRN_SIAR	796
 #define SPRN_SDAR	797
 #define SPRN_TACR	888
@@ -1472,7 +1480,7 @@
 {
 	/*
 	 *  The HID0 update on Power8 should at the very least be
-	 *  preceded by a a SYNC instruction followed by an ISYNC
+	 *  preceded by a SYNC instruction followed by an ISYNC
 	 *  instruction
 	 */
 	asm volatile("sync; mtspr %0,%1; isync":: "i"(SPRN_HID0), "r"(hid0));
diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 014968f..55f9a15 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -215,7 +215,6 @@
 
 #define PSERIES_HP_ELOG_ACTION_ADD	1
 #define PSERIES_HP_ELOG_ACTION_REMOVE	2
-#define PSERIES_HP_ELOG_ACTION_READD	3
 
 #define PSERIES_HP_ELOG_ID_DRC_NAME	1
 #define PSERIES_HP_ELOG_ID_DRC_INDEX	2
@@ -253,8 +252,6 @@
 extern void rtas_progress(char *s, unsigned short hex);
 extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
-extern int rtas_online_cpus_mask(cpumask_var_t cpus);
-extern int rtas_offline_cpus_mask(cpumask_var_t cpus);
 extern int rtas_ibm_suspend_me(u64 handle);
 
 struct rtc_time;
diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h
index 7c05e95..fbb8fa3 100644
--- a/arch/powerpc/include/asm/security_features.h
+++ b/arch/powerpc/include/asm/security_features.h
@@ -63,6 +63,8 @@
 // bcctr 2,0,0 triggers a hardware assisted count cache flush
 #define SEC_FTR_BCCTR_FLUSH_ASSIST	0x0000000000000800ull
 
+// bcctr 2,0,0 triggers a hardware assisted link stack flush
+#define SEC_FTR_BCCTR_LINK_FLUSH_ASSIST	0x0000000000002000ull
 
 // Features indicating need for Spectre/Meltdown mitigations
 
diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h
index 65676e2..9efbdde 100644
--- a/arch/powerpc/include/asm/setup.h
+++ b/arch/powerpc/include/asm/setup.h
@@ -30,12 +30,12 @@
 #define ARCH_PANIC_TIMEOUT 180
 
 #ifdef CONFIG_PPC_PSERIES
-extern void pseries_enable_reloc_on_exc(void);
+extern bool pseries_enable_reloc_on_exc(void);
 extern void pseries_disable_reloc_on_exc(void);
 extern void pseries_big_endian_exceptions(void);
 extern void pseries_little_endian_exceptions(void);
 #else
-static inline void pseries_enable_reloc_on_exc(void) {}
+static inline bool pseries_enable_reloc_on_exc(void) { return false; }
 static inline void pseries_disable_reloc_on_exc(void) {}
 static inline void pseries_big_endian_exceptions(void) {}
 static inline void pseries_little_endian_exceptions(void) {}
diff --git a/arch/powerpc/include/asm/simple_spinlock.h b/arch/powerpc/include/asm/simple_spinlock.h
new file mode 100644
index 0000000..9c3c305
--- /dev/null
+++ b/arch/powerpc/include/asm/simple_spinlock.h
@@ -0,0 +1,288 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_H
+#define _ASM_POWERPC_SIMPLE_SPINLOCK_H
+
+/*
+ * Simple spin lock operations.
+ *
+ * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
+ * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
+ * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
+ *	Rework to support virtual processors
+ *
+ * Type of int is used as a full 64b word is not necessary.
+ *
+ * (the type definitions are in asm/simple_spinlock_types.h)
+ */
+#include <linux/irqflags.h>
+#include <asm/paravirt.h>
+#include <asm/paca.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+#ifdef CONFIG_PPC64
+/* use 0x800000yy when locked, where yy == CPU number */
+#ifdef __BIG_ENDIAN__
+#define LOCK_TOKEN	(*(u32 *)(&get_paca()->lock_token))
+#else
+#define LOCK_TOKEN	(*(u32 *)(&get_paca()->paca_index))
+#endif
+#else
+#define LOCK_TOKEN	1
+#endif
+
+static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
+{
+	return lock.slock == 0;
+}
+
+static inline int arch_spin_is_locked(arch_spinlock_t *lock)
+{
+	smp_mb();
+	return !arch_spin_value_unlocked(*lock);
+}
+
+/*
+ * This returns the old value in the lock, so we succeeded
+ * in getting the lock if the return value is 0.
+ */
+static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
+{
+	unsigned long tmp, token;
+
+	token = LOCK_TOKEN;
+	__asm__ __volatile__(
+"1:	" PPC_LWARX(%0,0,%2,1) "\n\
+	cmpwi		0,%0,0\n\
+	bne-		2f\n\
+	stwcx.		%1,0,%2\n\
+	bne-		1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"
+	: "=&r" (tmp)
+	: "r" (token), "r" (&lock->slock)
+	: "cr0", "memory");
+
+	return tmp;
+}
+
+static inline int arch_spin_trylock(arch_spinlock_t *lock)
+{
+	return __arch_spin_trylock(lock) == 0;
+}
+
+/*
+ * On a system with shared processors (that is, where a physical
+ * processor is multiplexed between several virtual processors),
+ * there is no point spinning on a lock if the holder of the lock
+ * isn't currently scheduled on a physical processor.  Instead
+ * we detect this situation and ask the hypervisor to give the
+ * rest of our timeslice to the lock holder.
+ *
+ * So that we can tell which virtual processor is holding a lock,
+ * we put 0x80000000 | smp_processor_id() in the lock when it is
+ * held.  Conveniently, we have a word in the paca that holds this
+ * value.
+ */
+
+#if defined(CONFIG_PPC_SPLPAR)
+/* We only yield to the hypervisor if we are in shared processor mode */
+void splpar_spin_yield(arch_spinlock_t *lock);
+void splpar_rw_yield(arch_rwlock_t *lock);
+#else /* SPLPAR */
+static inline void splpar_spin_yield(arch_spinlock_t *lock) {};
+static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
+#endif
+
+static inline void spin_yield(arch_spinlock_t *lock)
+{
+	if (is_shared_processor())
+		splpar_spin_yield(lock);
+	else
+		barrier();
+}
+
+static inline void rw_yield(arch_rwlock_t *lock)
+{
+	if (is_shared_processor())
+		splpar_rw_yield(lock);
+	else
+		barrier();
+}
+
+static inline void arch_spin_lock(arch_spinlock_t *lock)
+{
+	while (1) {
+		if (likely(__arch_spin_trylock(lock) == 0))
+			break;
+		do {
+			HMT_low();
+			if (is_shared_processor())
+				splpar_spin_yield(lock);
+		} while (unlikely(lock->slock != 0));
+		HMT_medium();
+	}
+}
+
+static inline
+void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
+{
+	unsigned long flags_dis;
+
+	while (1) {
+		if (likely(__arch_spin_trylock(lock) == 0))
+			break;
+		local_save_flags(flags_dis);
+		local_irq_restore(flags);
+		do {
+			HMT_low();
+			if (is_shared_processor())
+				splpar_spin_yield(lock);
+		} while (unlikely(lock->slock != 0));
+		HMT_medium();
+		local_irq_restore(flags_dis);
+	}
+}
+#define arch_spin_lock_flags arch_spin_lock_flags
+
+static inline void arch_spin_unlock(arch_spinlock_t *lock)
+{
+	__asm__ __volatile__("# arch_spin_unlock\n\t"
+				PPC_RELEASE_BARRIER: : :"memory");
+	lock->slock = 0;
+}
+
+/*
+ * Read-write spinlocks, allowing multiple readers
+ * but only one writer.
+ *
+ * NOTE! it is quite common to have readers in interrupts
+ * but no interrupt writers. For those circumstances we
+ * can "mix" irq-safe locks - any writer needs to get a
+ * irq-safe write-lock, but readers can get non-irqsafe
+ * read-locks.
+ */
+
+#ifdef CONFIG_PPC64
+#define __DO_SIGN_EXTEND	"extsw	%0,%0\n"
+#define WRLOCK_TOKEN		LOCK_TOKEN	/* it's negative */
+#else
+#define __DO_SIGN_EXTEND
+#define WRLOCK_TOKEN		(-1)
+#endif
+
+/*
+ * This returns the old value in the lock + 1,
+ * so we got a read lock if the return value is > 0.
+ */
+static inline long __arch_read_trylock(arch_rwlock_t *rw)
+{
+	long tmp;
+
+	__asm__ __volatile__(
+"1:	" PPC_LWARX(%0,0,%1,1) "\n"
+	__DO_SIGN_EXTEND
+"	addic.		%0,%0,1\n\
+	ble-		2f\n"
+"	stwcx.		%0,0,%1\n\
+	bne-		1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"	: "=&r" (tmp)
+	: "r" (&rw->lock)
+	: "cr0", "xer", "memory");
+
+	return tmp;
+}
+
+/*
+ * This returns the old value in the lock,
+ * so we got the write lock if the return value is 0.
+ */
+static inline long __arch_write_trylock(arch_rwlock_t *rw)
+{
+	long tmp, token;
+
+	token = WRLOCK_TOKEN;
+	__asm__ __volatile__(
+"1:	" PPC_LWARX(%0,0,%2,1) "\n\
+	cmpwi		0,%0,0\n\
+	bne-		2f\n"
+"	stwcx.		%1,0,%2\n\
+	bne-		1b\n"
+	PPC_ACQUIRE_BARRIER
+"2:"	: "=&r" (tmp)
+	: "r" (token), "r" (&rw->lock)
+	: "cr0", "memory");
+
+	return tmp;
+}
+
+static inline void arch_read_lock(arch_rwlock_t *rw)
+{
+	while (1) {
+		if (likely(__arch_read_trylock(rw) > 0))
+			break;
+		do {
+			HMT_low();
+			if (is_shared_processor())
+				splpar_rw_yield(rw);
+		} while (unlikely(rw->lock < 0));
+		HMT_medium();
+	}
+}
+
+static inline void arch_write_lock(arch_rwlock_t *rw)
+{
+	while (1) {
+		if (likely(__arch_write_trylock(rw) == 0))
+			break;
+		do {
+			HMT_low();
+			if (is_shared_processor())
+				splpar_rw_yield(rw);
+		} while (unlikely(rw->lock != 0));
+		HMT_medium();
+	}
+}
+
+static inline int arch_read_trylock(arch_rwlock_t *rw)
+{
+	return __arch_read_trylock(rw) > 0;
+}
+
+static inline int arch_write_trylock(arch_rwlock_t *rw)
+{
+	return __arch_write_trylock(rw) == 0;
+}
+
+static inline void arch_read_unlock(arch_rwlock_t *rw)
+{
+	long tmp;
+
+	__asm__ __volatile__(
+	"# read_unlock\n\t"
+	PPC_RELEASE_BARRIER
+"1:	lwarx		%0,0,%1\n\
+	addic		%0,%0,-1\n"
+"	stwcx.		%0,0,%1\n\
+	bne-		1b"
+	: "=&r"(tmp)
+	: "r"(&rw->lock)
+	: "cr0", "xer", "memory");
+}
+
+static inline void arch_write_unlock(arch_rwlock_t *rw)
+{
+	__asm__ __volatile__("# write_unlock\n\t"
+				PPC_RELEASE_BARRIER: : :"memory");
+	rw->lock = 0;
+}
+
+#define arch_spin_relax(lock)	spin_yield(lock)
+#define arch_read_relax(lock)	rw_yield(lock)
+#define arch_write_relax(lock)	rw_yield(lock)
+
+/* See include/linux/spinlock.h */
+#define smp_mb__after_spinlock()   smp_mb()
+
+#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/simple_spinlock_types.h b/arch/powerpc/include/asm/simple_spinlock_types.h
new file mode 100644
index 0000000..0f3cdd8
--- /dev/null
+++ b/arch/powerpc/include/asm/simple_spinlock_types.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+#define _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H
+
+#ifndef __LINUX_SPINLOCK_TYPES_H
+# error "please don't include this file directly"
+#endif
+
+typedef struct {
+	volatile unsigned int slock;
+} arch_spinlock_t;
+
+#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
+
+typedef struct {
+	volatile signed int lock;
+} arch_rwlock_t;
+
+#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+
+#endif /* _ASM_POWERPC_SIMPLE_SPINLOCK_TYPES_H */
diff --git a/arch/powerpc/include/asm/smu.h b/arch/powerpc/include/asm/smu.h
index 8dff086..4b30a02 100644
--- a/arch/powerpc/include/asm/smu.h
+++ b/arch/powerpc/include/asm/smu.h
@@ -108,7 +108,7 @@
  /*
   * i2c commands
   *
-  * To issue an i2c command, first is to send a parameter block to the
+  * To issue an i2c command, first is to send a parameter block to
   * the SMU. This is a command of type 0x9a with 9 bytes of header
   * eventually followed by data for a write:
   *
diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h
index c89b324..1e6fa37 100644
--- a/arch/powerpc/include/asm/sparsemem.h
+++ b/arch/powerpc/include/asm/sparsemem.h
@@ -17,12 +17,6 @@
 				  int nid, pgprot_t prot);
 extern int remove_section_mapping(unsigned long start, unsigned long end);
 
-#ifdef CONFIG_PPC_BOOK3S_64
-extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
-#else
-static inline int resize_hpt_for_hotplug(unsigned long new_mem_size) { return 0; }
-#endif
-
 #ifdef CONFIG_NUMA
 extern int hot_add_scn_to_nid(unsigned long scn_addr);
 #else
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index 2d62089..6ec7228 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -3,312 +3,16 @@
 #define __ASM_SPINLOCK_H
 #ifdef __KERNEL__
 
-/*
- * Simple spin lock operations.  
- *
- * Copyright (C) 2001-2004 Paul Mackerras <paulus@au.ibm.com>, IBM
- * Copyright (C) 2001 Anton Blanchard <anton@au.ibm.com>, IBM
- * Copyright (C) 2002 Dave Engebretsen <engebret@us.ibm.com>, IBM
- *	Rework to support virtual processors
- *
- * Type of int is used as a full 64b word is not necessary.
- *
- * (the type definitions are in asm/spinlock_types.h)
- */
-#include <linux/jump_label.h>
-#include <linux/irqflags.h>
-#ifdef CONFIG_PPC64
-#include <asm/paca.h>
-#include <asm/hvcall.h>
-#endif
-#include <asm/synch.h>
-#include <asm/ppc-opcode.h>
-
-#ifdef CONFIG_PPC64
-/* use 0x800000yy when locked, where yy == CPU number */
-#ifdef __BIG_ENDIAN__
-#define LOCK_TOKEN	(*(u32 *)(&get_paca()->lock_token))
+#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+#include <asm/qspinlock.h>
+#include <asm/qrwlock.h>
 #else
-#define LOCK_TOKEN	(*(u32 *)(&get_paca()->paca_index))
-#endif
-#else
-#define LOCK_TOKEN	1
+#include <asm/simple_spinlock.h>
 #endif
 
-#ifdef CONFIG_PPC_PSERIES
-DECLARE_STATIC_KEY_FALSE(shared_processor);
-
-#define vcpu_is_preempted vcpu_is_preempted
-static inline bool vcpu_is_preempted(int cpu)
-{
-	if (!static_branch_unlikely(&shared_processor))
-		return false;
-	return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
-}
+#ifndef CONFIG_PARAVIRT_SPINLOCKS
+static inline void pv_spinlocks_init(void) { }
 #endif
 
-static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
-{
-	return lock.slock == 0;
-}
-
-static inline int arch_spin_is_locked(arch_spinlock_t *lock)
-{
-	smp_mb();
-	return !arch_spin_value_unlocked(*lock);
-}
-
-/*
- * This returns the old value in the lock, so we succeeded
- * in getting the lock if the return value is 0.
- */
-static inline unsigned long __arch_spin_trylock(arch_spinlock_t *lock)
-{
-	unsigned long tmp, token;
-
-	token = LOCK_TOKEN;
-	__asm__ __volatile__(
-"1:	" PPC_LWARX(%0,0,%2,1) "\n\
-	cmpwi		0,%0,0\n\
-	bne-		2f\n\
-	stwcx.		%1,0,%2\n\
-	bne-		1b\n"
-	PPC_ACQUIRE_BARRIER
-"2:"
-	: "=&r" (tmp)
-	: "r" (token), "r" (&lock->slock)
-	: "cr0", "memory");
-
-	return tmp;
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-	return __arch_spin_trylock(lock) == 0;
-}
-
-/*
- * On a system with shared processors (that is, where a physical
- * processor is multiplexed between several virtual processors),
- * there is no point spinning on a lock if the holder of the lock
- * isn't currently scheduled on a physical processor.  Instead
- * we detect this situation and ask the hypervisor to give the
- * rest of our timeslice to the lock holder.
- *
- * So that we can tell which virtual processor is holding a lock,
- * we put 0x80000000 | smp_processor_id() in the lock when it is
- * held.  Conveniently, we have a word in the paca that holds this
- * value.
- */
-
-#if defined(CONFIG_PPC_SPLPAR)
-/* We only yield to the hypervisor if we are in shared processor mode */
-void splpar_spin_yield(arch_spinlock_t *lock);
-void splpar_rw_yield(arch_rwlock_t *lock);
-#else /* SPLPAR */
-static inline void splpar_spin_yield(arch_spinlock_t *lock) {};
-static inline void splpar_rw_yield(arch_rwlock_t *lock) {};
-#endif
-
-static inline bool is_shared_processor(void)
-{
-#ifdef CONFIG_PPC_SPLPAR
-	return static_branch_unlikely(&shared_processor);
-#else
-	return false;
-#endif
-}
-
-static inline void spin_yield(arch_spinlock_t *lock)
-{
-	if (is_shared_processor())
-		splpar_spin_yield(lock);
-	else
-		barrier();
-}
-
-static inline void rw_yield(arch_rwlock_t *lock)
-{
-	if (is_shared_processor())
-		splpar_rw_yield(lock);
-	else
-		barrier();
-}
-
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-	while (1) {
-		if (likely(__arch_spin_trylock(lock) == 0))
-			break;
-		do {
-			HMT_low();
-			if (is_shared_processor())
-				splpar_spin_yield(lock);
-		} while (unlikely(lock->slock != 0));
-		HMT_medium();
-	}
-}
-
-static inline
-void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
-{
-	unsigned long flags_dis;
-
-	while (1) {
-		if (likely(__arch_spin_trylock(lock) == 0))
-			break;
-		local_save_flags(flags_dis);
-		local_irq_restore(flags);
-		do {
-			HMT_low();
-			if (is_shared_processor())
-				splpar_spin_yield(lock);
-		} while (unlikely(lock->slock != 0));
-		HMT_medium();
-		local_irq_restore(flags_dis);
-	}
-}
-#define arch_spin_lock_flags arch_spin_lock_flags
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-	__asm__ __volatile__("# arch_spin_unlock\n\t"
-				PPC_RELEASE_BARRIER: : :"memory");
-	lock->slock = 0;
-}
-
-/*
- * Read-write spinlocks, allowing multiple readers
- * but only one writer.
- *
- * NOTE! it is quite common to have readers in interrupts
- * but no interrupt writers. For those circumstances we
- * can "mix" irq-safe locks - any writer needs to get a
- * irq-safe write-lock, but readers can get non-irqsafe
- * read-locks.
- */
-
-#ifdef CONFIG_PPC64
-#define __DO_SIGN_EXTEND	"extsw	%0,%0\n"
-#define WRLOCK_TOKEN		LOCK_TOKEN	/* it's negative */
-#else
-#define __DO_SIGN_EXTEND
-#define WRLOCK_TOKEN		(-1)
-#endif
-
-/*
- * This returns the old value in the lock + 1,
- * so we got a read lock if the return value is > 0.
- */
-static inline long __arch_read_trylock(arch_rwlock_t *rw)
-{
-	long tmp;
-
-	__asm__ __volatile__(
-"1:	" PPC_LWARX(%0,0,%1,1) "\n"
-	__DO_SIGN_EXTEND
-"	addic.		%0,%0,1\n\
-	ble-		2f\n"
-"	stwcx.		%0,0,%1\n\
-	bne-		1b\n"
-	PPC_ACQUIRE_BARRIER
-"2:"	: "=&r" (tmp)
-	: "r" (&rw->lock)
-	: "cr0", "xer", "memory");
-
-	return tmp;
-}
-
-/*
- * This returns the old value in the lock,
- * so we got the write lock if the return value is 0.
- */
-static inline long __arch_write_trylock(arch_rwlock_t *rw)
-{
-	long tmp, token;
-
-	token = WRLOCK_TOKEN;
-	__asm__ __volatile__(
-"1:	" PPC_LWARX(%0,0,%2,1) "\n\
-	cmpwi		0,%0,0\n\
-	bne-		2f\n"
-"	stwcx.		%1,0,%2\n\
-	bne-		1b\n"
-	PPC_ACQUIRE_BARRIER
-"2:"	: "=&r" (tmp)
-	: "r" (token), "r" (&rw->lock)
-	: "cr0", "memory");
-
-	return tmp;
-}
-
-static inline void arch_read_lock(arch_rwlock_t *rw)
-{
-	while (1) {
-		if (likely(__arch_read_trylock(rw) > 0))
-			break;
-		do {
-			HMT_low();
-			if (is_shared_processor())
-				splpar_rw_yield(rw);
-		} while (unlikely(rw->lock < 0));
-		HMT_medium();
-	}
-}
-
-static inline void arch_write_lock(arch_rwlock_t *rw)
-{
-	while (1) {
-		if (likely(__arch_write_trylock(rw) == 0))
-			break;
-		do {
-			HMT_low();
-			if (is_shared_processor())
-				splpar_rw_yield(rw);
-		} while (unlikely(rw->lock != 0));
-		HMT_medium();
-	}
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *rw)
-{
-	return __arch_read_trylock(rw) > 0;
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *rw)
-{
-	return __arch_write_trylock(rw) == 0;
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *rw)
-{
-	long tmp;
-
-	__asm__ __volatile__(
-	"# read_unlock\n\t"
-	PPC_RELEASE_BARRIER
-"1:	lwarx		%0,0,%1\n\
-	addic		%0,%0,-1\n"
-"	stwcx.		%0,0,%1\n\
-	bne-		1b"
-	: "=&r"(tmp)
-	: "r"(&rw->lock)
-	: "cr0", "xer", "memory");
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *rw)
-{
-	__asm__ __volatile__("# write_unlock\n\t"
-				PPC_RELEASE_BARRIER: : :"memory");
-	rw->lock = 0;
-}
-
-#define arch_spin_relax(lock)	spin_yield(lock)
-#define arch_read_relax(lock)	rw_yield(lock)
-#define arch_write_relax(lock)	rw_yield(lock)
-
-/* See include/linux/spinlock.h */
-#define smp_mb__after_spinlock()   smp_mb()
-
 #endif /* __KERNEL__ */
 #endif /* __ASM_SPINLOCK_H */
diff --git a/arch/powerpc/include/asm/spinlock_types.h b/arch/powerpc/include/asm/spinlock_types.h
index 87adaf1..c5d742f 100644
--- a/arch/powerpc/include/asm/spinlock_types.h
+++ b/arch/powerpc/include/asm/spinlock_types.h
@@ -6,16 +6,11 @@
 # error "please don't include this file directly"
 #endif
 
-typedef struct {
-	volatile unsigned int slock;
-} arch_spinlock_t;
-
-#define __ARCH_SPIN_LOCK_UNLOCKED	{ 0 }
-
-typedef struct {
-	volatile signed int lock;
-} arch_rwlock_t;
-
-#define __ARCH_RW_LOCK_UNLOCKED		{ 0 }
+#ifdef CONFIG_PPC_QUEUED_SPINLOCKS
+#include <asm-generic/qspinlock_types.h>
+#include <asm-generic/qrwlock_types.h>
+#else
+#include <asm/simple_spinlock_types.h>
+#endif
 
 #endif
diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h
index 3b01c69..972ed0d 100644
--- a/arch/powerpc/include/asm/sstep.h
+++ b/arch/powerpc/include/asm/sstep.h
@@ -40,6 +40,7 @@
 	CACHEOP,
 	BARRIER,
 	SYSCALL,
+	SYSCALL_VECTORED_0,
 	MFMSR,
 	MTMSR,
 	RFI,
@@ -104,6 +105,12 @@
 
 #define MKOP(t, f, s)	((t) | (f) | SIZE(s))
 
+/* Prefix instruction operands */
+#define GET_PREFIX_RA(i)	(((i) >> 16) & 0x1f)
+#define GET_PREFIX_R(i)		((i) & (1ul << 20))
+
+extern s32 patch__exec_instr;
+
 struct instruction_op {
 	int type;
 	int reg;
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index b726927..283552c 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -28,7 +28,7 @@
 extern void * memmove(void *,const void *,__kernel_size_t);
 extern int memcmp(const void *,const void *,__kernel_size_t);
 extern void * memchr(const void *,int,__kernel_size_t);
-extern void * memcpy_flushcache(void *,const void *,__kernel_size_t);
+void memcpy_flushcache(void *dest, const void *src, size_t size);
 
 void *__memset(void *s, int c, __kernel_size_t count);
 void *__memcpy(void *to, const void *from, __kernel_size_t n);
diff --git a/arch/powerpc/include/asm/time.h b/arch/powerpc/include/asm/time.h
index b287cfc..cb32672 100644
--- a/arch/powerpc/include/asm/time.h
+++ b/arch/powerpc/include/asm/time.h
@@ -39,7 +39,6 @@
 };
 
 /* Accessor functions for the timebase (RTC on 601) registers. */
-/* If one day CONFIG_POWER is added just define __USE_RTC as 1 */
 #define __USE_RTC()	(IS_ENABLED(CONFIG_PPC_BOOK3S_601))
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/include/asm/timex.h b/arch/powerpc/include/asm/timex.h
index d2d2c4b..6047402 100644
--- a/arch/powerpc/include/asm/timex.h
+++ b/arch/powerpc/include/asm/timex.h
@@ -17,7 +17,7 @@
 
 static inline cycles_t get_cycles(void)
 {
-	if (IS_ENABLED(CONFIG_BOOK3S_601))
+	if (IS_ENABLED(CONFIG_PPC_BOOK3S_601))
 		return 0;
 
 	return mftb();
diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h
index 2db7ba7..f0b6300 100644
--- a/arch/powerpc/include/asm/topology.h
+++ b/arch/powerpc/include/asm/topology.h
@@ -43,7 +43,6 @@
 
 extern int sysfs_add_device_to_node(struct device *dev, int nid);
 extern void sysfs_remove_device_from_node(struct device *dev, int nid);
-extern int numa_update_cpu_topology(bool cpus_locked);
 
 static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node)
 {
@@ -78,11 +77,6 @@
 {
 }
 
-static inline int numa_update_cpu_topology(bool cpus_locked)
-{
-	return 0;
-}
-
 static inline void update_numa_cpu_lookup_table(unsigned int cpu, int node) {}
 
 static inline int cpu_distance(__be32 *cpu1_assoc, __be32 *cpu2_assoc)
@@ -93,32 +87,12 @@
 #endif /* CONFIG_NUMA */
 
 #if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
-extern int start_topology_update(void);
-extern int stop_topology_update(void);
-extern int prrn_is_enabled(void);
 extern int find_and_online_cpu_nid(int cpu);
-extern int timed_topology_update(int nsecs);
 #else
-static inline int start_topology_update(void)
-{
-	return 0;
-}
-static inline int stop_topology_update(void)
-{
-	return 0;
-}
-static inline int prrn_is_enabled(void)
-{
-	return 0;
-}
 static inline int find_and_online_cpu_nid(int cpu)
 {
 	return 0;
 }
-static inline int timed_topology_update(int nsecs)
-{
-	return 0;
-}
 
 #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */
 
@@ -141,7 +115,6 @@
 #define topology_core_cpumask(cpu)	(per_cpu(cpu_core_map, cpu))
 #define topology_core_id(cpu)		(cpu_to_core_id(cpu))
 
-int dlpar_cpu_readd(int cpu);
 #endif
 #endif
 
diff --git a/arch/powerpc/include/asm/xive.h b/arch/powerpc/include/asm/xive.h
index d08ea11..309b4d6 100644
--- a/arch/powerpc/include/asm/xive.h
+++ b/arch/powerpc/include/asm/xive.h
@@ -155,7 +155,6 @@
 static inline int  xive_smp_prepare_cpu(unsigned int cpu) { return -EINVAL; }
 static inline void xive_smp_setup_cpu(void) { }
 static inline void xive_smp_disable_cpu(void) { }
-static inline void xive_kexec_teardown_cpu(int secondary) { }
 static inline void xive_shutdown(void) { }
 static inline void xive_flush_interrupt(void) { }
 
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index 264e266..c3af3f3 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -640,6 +640,11 @@
 #define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
 #define KVM_REG_PPC_PTCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
 
+/* POWER10 registers */
+#define KVM_REG_PPC_MMCR3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
+#define KVM_REG_PPC_SIER2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
+#define KVM_REG_PPC_SIER3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/arch/powerpc/include/uapi/asm/mman.h b/arch/powerpc/include/uapi/asm/mman.h
index c0c7372..3a70035 100644
--- a/arch/powerpc/include/uapi/asm/mman.h
+++ b/arch/powerpc/include/uapi/asm/mman.h
@@ -11,7 +11,7 @@
 #include <asm-generic/mman-common.h>
 
 
-#define PROT_SAO	0x10		/* Strong Access Ordering */
+#define PROT_SAO	0x10		/* Unsupported since v5.9 */
 
 #define MAP_RENAME      MAP_ANONYMOUS   /* In SunOS terminology */
 #define MAP_NORESERVE   0x40            /* don't reserve swap pages */
diff --git a/arch/powerpc/include/uapi/asm/papr_pdsm.h b/arch/powerpc/include/uapi/asm/papr_pdsm.h
index 9ccecc1d..50ef95e 100644
--- a/arch/powerpc/include/uapi/asm/papr_pdsm.h
+++ b/arch/powerpc/include/uapi/asm/papr_pdsm.h
@@ -72,6 +72,11 @@
 #define PAPR_PDSM_DIMM_CRITICAL      2
 #define PAPR_PDSM_DIMM_FATAL         3
 
+/* struct nd_papr_pdsm_health.extension_flags field flags */
+
+/* Indicate that the 'dimm_fuel_gauge' field is valid */
+#define PDSM_DIMM_HEALTH_RUN_GAUGE_VALID 1
+
 /*
  * Struct exchanged between kernel & ndctl in for PAPR_PDSM_HEALTH
  * Various flags indicate the health status of the dimm.
@@ -84,6 +89,7 @@
  * dimm_locked		: Contents of the dimm cant be modified until CEC reboot
  * dimm_encrypted	: Contents of dimm are encrypted.
  * dimm_health		: Dimm health indicator. One of PAPR_PDSM_DIMM_XXXX
+ * dimm_fuel_gauge	: Life remaining of DIMM as a percentage from 0-100
  */
 struct nd_papr_pdsm_health {
 	union {
@@ -96,6 +102,9 @@
 			__u8 dimm_locked;
 			__u8 dimm_encrypted;
 			__u16 dimm_health;
+
+			/* Extension flag PDSM_DIMM_HEALTH_RUN_GAUGE_VALID */
+			__u16 dimm_fuel_gauge;
 		};
 		__u8 buf[ND_PDSM_PAYLOAD_MAX_SIZE];
 	};
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 244542a..d4d5946 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -45,11 +45,10 @@
 				   signal.o sysfs.o cacheinfo.o time.o \
 				   prom.o traps.o setup-common.o \
 				   udbg.o misc.o io.o misc_$(BITS).o \
-				   of_platform.o prom_parse.o
+				   of_platform.o prom_parse.o firmware.o
 obj-y				+= ptrace/
 obj-$(CONFIG_PPC64)		+= setup_64.o \
-				   paca.o nvram_64.o firmware.o note.o \
-				   syscall_64.o
+				   paca.o nvram_64.o note.o syscall_64.o
 obj-$(CONFIG_COMPAT)		+= sys_ppc32.o signal_32.o
 obj-$(CONFIG_VDSO32)		+= vdso32/
 obj-$(CONFIG_PPC_WATCHDOG)	+= watchdog.o
@@ -71,7 +70,7 @@
 obj-$(CONFIG_RTAS_FLASH)	+= rtas_flash.o
 obj-$(CONFIG_RTAS_PROC)		+= rtas-proc.o
 obj-$(CONFIG_PPC_DT_CPU_FTRS)	+= dt_cpu_ftrs.o
-obj-$(CONFIG_EEH)              += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
+obj-$(CONFIG_EEH)              += eeh.o eeh_pe.o eeh_cache.o \
 				  eeh_driver.o eeh_event.o eeh_sysfs.o
 obj-$(CONFIG_GENERIC_TBSYNC)	+= smp-tbsync.o
 obj-$(CONFIG_CRASH_DUMP)	+= crash_dump.o
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index 1f1ce8b..c7797eb9 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -178,11 +178,11 @@
 			ret |= __get_user_inatomic(temp.v[1], p++);
 			ret |= __get_user_inatomic(temp.v[2], p++);
 			ret |= __get_user_inatomic(temp.v[3], p++);
-			/* fall through */
+			fallthrough;
 		case 4:
 			ret |= __get_user_inatomic(temp.v[4], p++);
 			ret |= __get_user_inatomic(temp.v[5], p++);
-			/* fall through */
+			fallthrough;
 		case 2:
 			ret |= __get_user_inatomic(temp.v[6], p++);
 			ret |= __get_user_inatomic(temp.v[7], p++);
@@ -263,11 +263,11 @@
 			ret |= __put_user_inatomic(data.v[1], p++);
 			ret |= __put_user_inatomic(data.v[2], p++);
 			ret |= __put_user_inatomic(data.v[3], p++);
-			/* fall through */
+			fallthrough;
 		case 4:
 			ret |= __put_user_inatomic(data.v[4], p++);
 			ret |= __put_user_inatomic(data.v[5], p++);
-			/* fall through */
+			fallthrough;
 		case 2:
 			ret |= __put_user_inatomic(data.v[6], p++);
 			ret |= __put_user_inatomic(data.v[7], p++);
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 6657dc6..8711c21 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -559,6 +559,8 @@
 	OFFSET(VCPU_IRQ_PENDING, kvm_vcpu, arch.irq_pending);
 	OFFSET(VCPU_DBELL_REQ, kvm_vcpu, arch.doorbell_request);
 	OFFSET(VCPU_MMCR, kvm_vcpu, arch.mmcr);
+	OFFSET(VCPU_MMCRA, kvm_vcpu, arch.mmcra);
+	OFFSET(VCPU_MMCRS, kvm_vcpu, arch.mmcrs);
 	OFFSET(VCPU_PMC, kvm_vcpu, arch.pmc);
 	OFFSET(VCPU_SPMC, kvm_vcpu, arch.spmc);
 	OFFSET(VCPU_SIAR, kvm_vcpu, arch.siar);
@@ -696,6 +698,9 @@
 	HSTATE_FIELD(HSTATE_SDAR, host_mmcr[4]);
 	HSTATE_FIELD(HSTATE_MMCR2, host_mmcr[5]);
 	HSTATE_FIELD(HSTATE_SIER, host_mmcr[6]);
+	HSTATE_FIELD(HSTATE_MMCR3, host_mmcr[7]);
+	HSTATE_FIELD(HSTATE_SIER2, host_mmcr[8]);
+	HSTATE_FIELD(HSTATE_SIER3, host_mmcr[9]);
 	HSTATE_FIELD(HSTATE_PMC1, host_pmc[0]);
 	HSTATE_FIELD(HSTATE_PMC2, host_pmc[1]);
 	HSTATE_FIELD(HSTATE_PMC3, host_pmc[2]);
diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c
index 4703362..65ab9fc 100644
--- a/arch/powerpc/kernel/cacheinfo.c
+++ b/arch/powerpc/kernel/cacheinfo.c
@@ -7,6 +7,8 @@
  * Author: Nathan Lynch
  */
 
+#define pr_fmt(fmt) "cacheinfo: " fmt
+
 #include <linux/cpu.h>
 #include <linux/cpumask.h>
 #include <linux/kernel.h>
@@ -166,7 +168,7 @@
 
 	list_for_each_entry(iter, &cache_list, list)
 		WARN_ONCE(iter->next_local == cache,
-			  "cache for %pOF(%s) refers to cache for %pOF(%s)\n",
+			  "cache for %pOFP(%s) refers to cache for %pOFP(%s)\n",
 			  iter->ofnode,
 			  cache_type_string(iter),
 			  cache->ofnode,
@@ -178,7 +180,7 @@
 	if (!cache)
 		return;
 
-	pr_debug("freeing L%d %s cache for %pOF\n", cache->level,
+	pr_debug("freeing L%d %s cache for %pOFP\n", cache->level,
 		 cache_type_string(cache), cache->ofnode);
 
 	release_cache_debugcheck(cache);
@@ -193,7 +195,7 @@
 
 	while (next) {
 		WARN_ONCE(cpumask_test_cpu(cpu, &next->shared_cpu_map),
-			  "CPU %i already accounted in %pOF(%s)\n",
+			  "CPU %i already accounted in %pOFP(%s)\n",
 			  cpu, next->ofnode,
 			  cache_type_string(next));
 		cpumask_set_cpu(cpu, &next->shared_cpu_map);
@@ -352,7 +354,7 @@
 
 static struct cache *cache_do_one_devnode_unified(struct device_node *node, int level)
 {
-	pr_debug("creating L%d ucache for %pOF\n", level, node);
+	pr_debug("creating L%d ucache for %pOFP\n", level, node);
 
 	return new_cache(cache_is_unified_d(node), level, node);
 }
@@ -362,7 +364,7 @@
 {
 	struct cache *dcache, *icache;
 
-	pr_debug("creating L%d dcache and icache for %pOF\n", level,
+	pr_debug("creating L%d dcache and icache for %pOFP\n", level,
 		 node);
 
 	dcache = new_cache(CACHE_TYPE_DATA, level, node);
@@ -418,12 +420,27 @@
 	}
 
 	smaller->next_local = bigger;
+
+	/*
+	 * The cache->next_local list sorts by level ascending:
+	 * L1d -> L1i -> L2 -> L3 ...
+	 */
+	WARN_ONCE((smaller->level == 1 && bigger->level > 2) ||
+		  (smaller->level > 1 && bigger->level != smaller->level + 1),
+		  "linking L%i cache %pOFP to L%i cache %pOFP; skipped a level?\n",
+		  smaller->level, smaller->ofnode, bigger->level, bigger->ofnode);
 }
 
 static void do_subsidiary_caches_debugcheck(struct cache *cache)
 {
-	WARN_ON_ONCE(cache->level != 1);
-	WARN_ON_ONCE(!of_node_is_type(cache->ofnode, "cpu"));
+	WARN_ONCE(cache->level != 1,
+		  "instantiating cache chain from L%d %s cache for "
+		  "%pOFP instead of an L1\n", cache->level,
+		  cache_type_string(cache), cache->ofnode);
+	WARN_ONCE(!of_node_is_type(cache->ofnode, "cpu"),
+		  "instantiating cache chain from node %pOFP of type '%s' "
+		  "instead of a cpu node\n", cache->ofnode,
+		  of_node_get_device_type(cache->ofnode));
 }
 
 static void do_subsidiary_caches(struct cache *cache)
@@ -647,12 +664,13 @@
 	return &cache->shared_cpu_map;
 }
 
-static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+static ssize_t
+show_shared_cpumap(struct kobject *k, struct kobj_attribute *attr, char *buf, bool list)
 {
 	struct cache_index_dir *index;
 	struct cache *cache;
 	const struct cpumask *mask;
-	int ret, cpu;
+	int cpu;
 
 	index = kobj_to_cache_index_dir(k);
 	cache = index->cache;
@@ -664,16 +682,25 @@
 		mask  = &cache->shared_cpu_map;
 	}
 
-	ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n",
-			cpumask_pr_args(mask));
-	buf[ret++] = '\n';
-	buf[ret] = '\0';
-	return ret;
+	return cpumap_print_to_pagebuf(list, buf, mask);
+}
+
+static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	return show_shared_cpumap(k, attr, buf, false);
+}
+
+static ssize_t shared_cpu_list_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
+{
+	return show_shared_cpumap(k, attr, buf, true);
 }
 
 static struct kobj_attribute cache_shared_cpu_map_attr =
 	__ATTR(shared_cpu_map, 0444, shared_cpu_map_show, NULL);
 
+static struct kobj_attribute cache_shared_cpu_list_attr =
+	__ATTR(shared_cpu_list, 0444, shared_cpu_list_show, NULL);
+
 /* Attributes which should always be created -- the kobject/sysfs core
  * does this automatically via kobj_type->default_attrs.  This is the
  * minimum data required to uniquely identify a cache.
@@ -682,6 +709,7 @@
 	&cache_type_attr.attr,
 	&cache_level_attr.attr,
 	&cache_shared_cpu_map_attr.attr,
+	&cache_shared_cpu_list_attr.attr,
 	NULL,
 };
 
@@ -733,13 +761,13 @@
 		rc = attr->show(&dir->kobj, attr, buf);
 		if (rc <= 0) {
 			pr_debug("not creating %s attribute for "
-				 "%pOF(%s) (rc = %zd)\n",
+				 "%pOFP(%s) (rc = %zd)\n",
 				 attr->attr.name, cache->ofnode,
 				 cache_type, rc);
 			continue;
 		}
 		if (sysfs_create_file(&dir->kobj, &attr->attr))
-			pr_debug("could not create %s attribute for %pOF(%s)\n",
+			pr_debug("could not create %s attribute for %pOFP(%s)\n",
 				 attr->attr.name, cache->ofnode, cache_type);
 	}
 
@@ -855,7 +883,7 @@
 		struct cache *next = cache->next_local;
 
 		WARN_ONCE(!cpumask_test_cpu(cpu, &cache->shared_cpu_map),
-			  "CPU %i not accounted in %pOF(%s)\n",
+			  "CPU %i not accounted in %pOFP(%s)\n",
 			  cpu, cache->ofnode,
 			  cache_type_string(cache));
 
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index efdcfa7..704e8b9 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -94,13 +94,15 @@
 _GLOBAL(__setup_cpu_power10)
 	mflr	r11
 	bl	__init_FSCR_power10
+	bl	__init_PMU
+	bl	__init_PMU_ISA31
 	b	1f
 
 _GLOBAL(__setup_cpu_power9)
 	mflr	r11
-	bl	__init_FSCR
-1:	bl	__init_PMU
-	bl	__init_hvmode_206
+	bl	__init_FSCR_power9
+	bl	__init_PMU
+1:	bl	__init_hvmode_206
 	mtlr	r11
 	beqlr
 	li	r0,0
@@ -124,13 +126,15 @@
 _GLOBAL(__restore_cpu_power10)
 	mflr	r11
 	bl	__init_FSCR_power10
+	bl	__init_PMU
+	bl	__init_PMU_ISA31
 	b	1f
 
 _GLOBAL(__restore_cpu_power9)
 	mflr	r11
-	bl	__init_FSCR
-1:	bl	__init_PMU
-	mfmsr	r3
+	bl	__init_FSCR_power9
+	bl	__init_PMU
+1:	mfmsr	r3
 	rldicl.	r0,r3,4,63
 	mtlr	r11
 	beqlr
@@ -198,6 +202,12 @@
 	mtspr	SPRN_FSCR, r3
 	// fall through
 
+__init_FSCR_power9:
+	mfspr	r3, SPRN_FSCR
+	ori	r3, r3, FSCR_SCV
+	mtspr	SPRN_FSCR, r3
+	// fall through
+
 __init_FSCR:
 	mfspr	r3,SPRN_FSCR
 	ori	r3,r3,FSCR_TAR|FSCR_EBB
@@ -233,3 +243,10 @@
 	li	r5,0
 	mtspr	SPRN_MMCRS,r5
 	blr
+
+__init_PMU_ISA31:
+	li	r5,0
+	mtspr	SPRN_MMCR3,r5
+	LOAD_REG_IMMEDIATE(r5, MMCRA_BHRB_DISABLE)
+	mtspr	SPRN_MMCRA,r5
+	blr
diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index b406635..3d406a9 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -120,7 +120,8 @@
 #define COMMON_USER2_POWER9	(COMMON_USER2_POWER8 | \
 				 PPC_FEATURE2_ARCH_3_00 | \
 				 PPC_FEATURE2_HAS_IEEE128 | \
-				 PPC_FEATURE2_DARN )
+				 PPC_FEATURE2_DARN | \
+				 PPC_FEATURE2_SCV)
 #define COMMON_USER_POWER10	COMMON_USER_POWER9
 #define COMMON_USER2_POWER10	(COMMON_USER2_POWER9 | \
 				 PPC_FEATURE2_ARCH_3_1 | \
diff --git a/arch/powerpc/kernel/dawr.c b/arch/powerpc/kernel/dawr.c
index 500f52f..cdc2dcc 100644
--- a/arch/powerpc/kernel/dawr.c
+++ b/arch/powerpc/kernel/dawr.c
@@ -37,7 +37,7 @@
 	dawrx |= (mrd & 0x3f) << (63 - 53);
 
 	if (ppc_md.set_dawr)
-		return ppc_md.set_dawr(dawr, dawrx);
+		return ppc_md.set_dawr(nr, dawr, dawrx);
 
 	if (nr == 0) {
 		mtspr(SPRN_DAWR0, dawr);
diff --git a/arch/powerpc/kernel/dbell.c b/arch/powerpc/kernel/dbell.c
index f17ff12..52680cf 100644
--- a/arch/powerpc/kernel/dbell.c
+++ b/arch/powerpc/kernel/dbell.c
@@ -18,61 +18,6 @@
 
 #ifdef CONFIG_SMP
 
-/*
- * Doorbells must only be used if CPU_FTR_DBELL is available.
- * msgsnd is used in HV, and msgsndp is used in !HV.
- *
- * These should be used by platform code that is aware of restrictions.
- * Other arch code should use ->cause_ipi.
- *
- * doorbell_global_ipi() sends a dbell to any target CPU.
- * Must be used only by architectures that address msgsnd target
- * by PIR/get_hard_smp_processor_id.
- */
-void doorbell_global_ipi(int cpu)
-{
-	u32 tag = get_hard_smp_processor_id(cpu);
-
-	kvmppc_set_host_ipi(cpu);
-	/* Order previous accesses vs. msgsnd, which is treated as a store */
-	ppc_msgsnd_sync();
-	ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
-}
-
-/*
- * doorbell_core_ipi() sends a dbell to a target CPU in the same core.
- * Must be used only by architectures that address msgsnd target
- * by TIR/cpu_thread_in_core.
- */
-void doorbell_core_ipi(int cpu)
-{
-	u32 tag = cpu_thread_in_core(cpu);
-
-	kvmppc_set_host_ipi(cpu);
-	/* Order previous accesses vs. msgsnd, which is treated as a store */
-	ppc_msgsnd_sync();
-	ppc_msgsnd(PPC_DBELL_MSGTYPE, 0, tag);
-}
-
-/*
- * Attempt to cause a core doorbell if destination is on the same core.
- * Returns 1 on success, 0 on failure.
- */
-int doorbell_try_core_ipi(int cpu)
-{
-	int this_cpu = get_cpu();
-	int ret = 0;
-
-	if (cpumask_test_cpu(cpu, cpu_sibling_mask(this_cpu))) {
-		doorbell_core_ipi(cpu);
-		ret = 1;
-	}
-
-	put_cpu();
-
-	return ret;
-}
-
 void doorbell_exception(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c
index 3a40951..6f8c0c6 100644
--- a/arch/powerpc/kernel/dt_cpu_ftrs.c
+++ b/arch/powerpc/kernel/dt_cpu_ftrs.c
@@ -24,7 +24,6 @@
 
 
 /* Device-tree visible constants follow */
-#define ISA_V2_07B      2070
 #define ISA_V3_0B       3000
 #define ISA_V3_1        3100
 
@@ -67,6 +66,7 @@
 
 extern long __machine_check_early_realmode_p8(struct pt_regs *regs);
 extern long __machine_check_early_realmode_p9(struct pt_regs *regs);
+extern long __machine_check_early_realmode_p10(struct pt_regs *regs);
 
 static int hv_mode;
 
@@ -337,6 +337,7 @@
 #ifdef CONFIG_PPC_RADIX_MMU
 	cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
 	cur_cpu_spec->mmu_features |= MMU_FTRS_HASH_BASE;
+	cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
 	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_MMU;
 
 	return 1;
@@ -450,6 +451,39 @@
 	return 1;
 }
 
+static void init_pmu_power10(void)
+{
+	init_pmu_power9();
+
+	mtspr(SPRN_MMCR3, 0);
+	mtspr(SPRN_MMCRA, MMCRA_BHRB_DISABLE);
+}
+
+static int __init feat_enable_pmu_power10(struct dt_cpu_feature *f)
+{
+	hfscr_pmu_enable();
+
+	init_pmu_power10();
+	init_pmu_registers = init_pmu_power10;
+
+	cur_cpu_spec->cpu_features |= CPU_FTR_MMCRA;
+	cur_cpu_spec->cpu_user_features |= PPC_FEATURE_PSERIES_PERFMON_COMPAT;
+
+	cur_cpu_spec->num_pmcs          = 6;
+	cur_cpu_spec->pmc_type          = PPC_PMC_IBM;
+	cur_cpu_spec->oprofile_cpu_type = "ppc64/power10";
+
+	return 1;
+}
+
+static int __init feat_enable_mce_power10(struct dt_cpu_feature *f)
+{
+	cur_cpu_spec->platform = "power10";
+	cur_cpu_spec->machine_check_early = __machine_check_early_realmode_p10;
+
+	return 1;
+}
+
 static int __init feat_enable_tm(struct dt_cpu_feature *f)
 {
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -587,6 +621,7 @@
 	{"little-endian", feat_enable_le, CPU_FTR_REAL_LE},
 	{"smt", feat_enable_smt, 0},
 	{"interrupt-facilities", feat_enable, 0},
+	{"system-call-vectored", feat_enable, 0},
 	{"timer-facilities", feat_enable, 0},
 	{"timer-facilities-v3", feat_enable, 0},
 	{"debug-facilities", feat_enable, 0},
@@ -622,7 +657,7 @@
 	{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
 	{"processor-utilization-of-resources-register", feat_enable_purr, 0},
 	{"no-execute", feat_enable, 0},
-	{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
+	/* strong-access-ordering is unused */
 	{"cache-inhibited-large-page", feat_enable_large_ci, 0},
 	{"coprocessor-icswx", feat_enable, 0},
 	{"hypervisor-virtualization-interrupt", feat_enable_hvi, 0},
@@ -638,7 +673,9 @@
 	{"group-start-register", feat_enable, 0},
 	{"pc-relative-addressing", feat_enable, 0},
 	{"machine-check-power9", feat_enable_mce_power9, 0},
+	{"machine-check-power10", feat_enable_mce_power10, 0},
 	{"performance-monitor-power9", feat_enable_pmu_power9, 0},
+	{"performance-monitor-power10", feat_enable_pmu_power10, 0},
 	{"event-based-branch-v3", feat_enable, 0},
 	{"random-number-generator", feat_enable, 0},
 	{"system-call-vectored", feat_disable, 0},
@@ -649,6 +686,7 @@
 	{"wait-v3", feat_enable, 0},
 	{"prefix-instructions", feat_enable, 0},
 	{"matrix-multiply-assist", feat_enable_mma, 0},
+	{"debug-facilities-v31", feat_enable, CPU_FTR_DAWR1},
 };
 
 static bool __initdata using_dt_cpu_ftrs;
@@ -674,12 +712,12 @@
 {
 	pr_info("setup for ISA %d\n", isa);
 
-	if (isa >= 3000) {
+	if (isa >= ISA_V3_0B) {
 		cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_300;
 		cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_00;
 	}
 
-	if (isa >= 3100) {
+	if (isa >= ISA_V3_1) {
 		cur_cpu_spec->cpu_features |= CPU_FTR_ARCH_31;
 		cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_ARCH_3_1;
 	}
@@ -776,12 +814,6 @@
 	}
 
 	update_tlbie_feature_flag(version);
-	/*
-	 * PKEY was not in the initial base or feature node
-	 * specification, but it should become optional in the next
-	 * cpu feature version sequence.
-	 */
-	cur_cpu_spec->cpu_features |= CPU_FTR_PKEY;
 }
 
 static void __init cpufeatures_setup_finished(void)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index d407981..9468238 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -167,39 +167,33 @@
  */
 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len)
 {
-	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	u32 cfg;
 	int cap, i;
 	int n = 0, l = 0;
 	char buffer[128];
 
-	if (!pdn) {
-		pr_warn("EEH: Note: No error log for absent device.\n");
-		return 0;
-	}
-
 	n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
-		       pdn->phb->global_number, pdn->busno,
-		       PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+			edev->pe->phb->global_number, edev->bdfn >> 8,
+			PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
 	pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n",
-		pdn->phb->global_number, pdn->busno,
-		PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
+		edev->pe->phb->global_number, edev->bdfn >> 8,
+		PCI_SLOT(edev->bdfn), PCI_FUNC(edev->bdfn));
 
-	eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg);
+	eeh_ops->read_config(edev, PCI_VENDOR_ID, 4, &cfg);
 	n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg);
 	pr_warn("EEH: PCI device/vendor: %08x\n", cfg);
 
-	eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg);
+	eeh_ops->read_config(edev, PCI_COMMAND, 4, &cfg);
 	n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg);
 	pr_warn("EEH: PCI cmd/status register: %08x\n", cfg);
 
 	/* Gather bridge-specific registers */
 	if (edev->mode & EEH_DEV_BRIDGE) {
-		eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg);
+		eeh_ops->read_config(edev, PCI_SEC_STATUS, 2, &cfg);
 		n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg);
 		pr_warn("EEH: Bridge secondary status: %04x\n", cfg);
 
-		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg);
+		eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &cfg);
 		n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg);
 		pr_warn("EEH: Bridge control: %04x\n", cfg);
 	}
@@ -207,11 +201,11 @@
 	/* Dump out the PCI-X command and status regs */
 	cap = edev->pcix_cap;
 	if (cap) {
-		eeh_ops->read_config(pdn, cap, 4, &cfg);
+		eeh_ops->read_config(edev, cap, 4, &cfg);
 		n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg);
 		pr_warn("EEH: PCI-X cmd: %08x\n", cfg);
 
-		eeh_ops->read_config(pdn, cap+4, 4, &cfg);
+		eeh_ops->read_config(edev, cap+4, 4, &cfg);
 		n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg);
 		pr_warn("EEH: PCI-X status: %08x\n", cfg);
 	}
@@ -223,7 +217,7 @@
 		pr_warn("EEH: PCI-E capabilities and status follow:\n");
 
 		for (i=0; i<=8; i++) {
-			eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
+			eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
 
 			if ((i % 4) == 0) {
@@ -250,7 +244,7 @@
 		pr_warn("EEH: PCI-E AER capability register set follows:\n");
 
 		for (i=0; i<=13; i++) {
-			eeh_ops->read_config(pdn, cap+4*i, 4, &cfg);
+			eeh_ops->read_config(edev, cap+4*i, 4, &cfg);
 			n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg);
 
 			if ((i % 4) == 0) {
@@ -726,7 +720,6 @@
 
 static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata)
 {
-	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	struct pci_dev *pdev = eeh_dev_to_pci_dev(edev);
 	struct pci_dev *dev = userdata;
 
@@ -734,73 +727,14 @@
 		return;
 
 	/* Apply customization from firmware */
-	if (pdn && eeh_ops->restore_config)
-		eeh_ops->restore_config(pdn);
+	if (eeh_ops->restore_config)
+		eeh_ops->restore_config(edev);
 
 	/* The caller should restore state for the specified device */
 	if (pdev != dev)
 		pci_restore_state(pdev);
 }
 
-int eeh_restore_vf_config(struct pci_dn *pdn)
-{
-	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-	u32 devctl, cmd, cap2, aer_capctl;
-	int old_mps;
-
-	if (edev->pcie_cap) {
-		/* Restore MPS */
-		old_mps = (ffs(pdn->mps) - 8) << 5;
-		eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
-				     2, &devctl);
-		devctl &= ~PCI_EXP_DEVCTL_PAYLOAD;
-		devctl |= old_mps;
-		eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
-				      2, devctl);
-
-		/* Disable Completion Timeout if possible */
-		eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2,
-				     4, &cap2);
-		if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) {
-			eeh_ops->read_config(pdn,
-					     edev->pcie_cap + PCI_EXP_DEVCTL2,
-					     4, &cap2);
-			cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS;
-			eeh_ops->write_config(pdn,
-					      edev->pcie_cap + PCI_EXP_DEVCTL2,
-					      4, cap2);
-		}
-	}
-
-	/* Enable SERR and parity checking */
-	eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd);
-	cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR);
-	eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd);
-
-	/* Enable report various errors */
-	if (edev->pcie_cap) {
-		eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
-				     2, &devctl);
-		devctl &= ~PCI_EXP_DEVCTL_CERE;
-		devctl |= (PCI_EXP_DEVCTL_NFERE |
-			   PCI_EXP_DEVCTL_FERE |
-			   PCI_EXP_DEVCTL_URRE);
-		eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
-				      2, devctl);
-	}
-
-	/* Enable ECRC generation and check */
-	if (edev->pcie_cap && edev->aer_cap) {
-		eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP,
-				     4, &aer_capctl);
-		aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE);
-		eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP,
-				      4, aer_capctl);
-	}
-
-	return 0;
-}
-
 /**
  * pcibios_set_pcie_reset_state - Set PCI-E reset state
  * @dev: pci device struct
@@ -977,15 +911,13 @@
  */
 void eeh_save_bars(struct eeh_dev *edev)
 {
-	struct pci_dn *pdn;
 	int i;
 
-	pdn = eeh_dev_to_pdn(edev);
-	if (!pdn)
+	if (!edev)
 		return;
 
 	for (i = 0; i < 16; i++)
-		eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]);
+		eeh_ops->read_config(edev, i * 4, 4, &edev->config_space[i]);
 
 	/*
 	 * For PCI bridges including root port, we need enable bus
@@ -1096,7 +1028,7 @@
 
 	/* Initialize PHB PEs */
 	list_for_each_entry_safe(hose, tmp, &hose_list, list_node)
-		eeh_dev_phb_init_dynamic(hose);
+		eeh_phb_pe_create(hose);
 
 	eeh_addr_cache_init();
 
@@ -1175,7 +1107,7 @@
 	 * FIXME: HEY MA, LOOK AT ME, NO LOCKING!
 	 */
 	if (edev->pdev && edev->pdev != dev) {
-		eeh_rmv_from_parent_pe(edev);
+		eeh_pe_tree_remove(edev);
 		eeh_addr_cache_rmv_dev(edev->pdev);
 		eeh_sysfs_remove_device(edev->pdev);
 
@@ -1254,7 +1186,7 @@
 	edev->in_error = false;
 	dev->dev.archdata.edev = NULL;
 	if (!(edev->pe->state & EEH_PE_KEEP))
-		eeh_rmv_from_parent_pe(edev);
+		eeh_pe_tree_remove(edev);
 	else
 		edev->mode |= EEH_DEV_DISCONNECTED;
 }
diff --git a/arch/powerpc/kernel/eeh_dev.c b/arch/powerpc/kernel/eeh_dev.c
deleted file mode 100644
index 7370185..0000000
--- a/arch/powerpc/kernel/eeh_dev.c
+++ /dev/null
@@ -1,67 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The file intends to implement dynamic creation of EEH device, which will
- * be bound with OF node and PCI device simutaneously. The EEH devices would
- * be foundamental information for EEH core components to work proerly. Besides,
- * We have to support multiple situations where dynamic creation of EEH device
- * is required:
- *
- * 1) Before PCI emunation starts, we need create EEH devices according to the
- *    PCI sensitive OF nodes.
- * 2) When PCI emunation is done, we need do the binding between PCI device and
- *    the associated EEH device.
- * 3) DR (Dynamic Reconfiguration) would create PCI sensitive OF node. EEH device
- *    will be created while PCI sensitive OF node is detected from DR.
- * 4) PCI hotplug needs redoing the binding between PCI device and EEH device. If
- *    PHB is newly inserted, we also need create EEH devices accordingly.
- *
- * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2012.
- */
-
-#include <linux/export.h>
-#include <linux/gfp.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/string.h>
-
-#include <asm/pci-bridge.h>
-#include <asm/ppc-pci.h>
-
-/**
- * eeh_dev_init - Create EEH device according to OF node
- * @pdn: PCI device node
- *
- * It will create EEH device according to the given OF node. The function
- * might be called by PCI emunation, DR, PHB hotplug.
- */
-struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
-{
-	struct eeh_dev *edev;
-
-	/* Allocate EEH device */
-	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
-	if (!edev)
-		return NULL;
-
-	/* Associate EEH device with OF node */
-	pdn->edev = edev;
-	edev->pdn = pdn;
-	edev->bdfn = (pdn->busno << 8) | pdn->devfn;
-	edev->controller = pdn->phb;
-
-	return edev;
-}
-
-/**
- * eeh_dev_phb_init_dynamic - Create EEH devices for devices included in PHB
- * @phb: PHB
- *
- * Scan the PHB OF node and its child association, then create the
- * EEH devices accordingly
- */
-void eeh_dev_phb_init_dynamic(struct pci_controller *phb)
-{
-	/* EEH PE for PHB */
-	eeh_phb_pe_create(phb);
-}
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 7b048ce..4197e45 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -425,8 +425,8 @@
 
 	pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
 #ifdef CONFIG_PCI_IOV
-	if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
-		eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
+	if (eeh_ops->notify_resume)
+		eeh_ops->notify_resume(edev);
 #endif
 	return PCI_ERS_RESULT_NONE;
 }
@@ -477,7 +477,7 @@
 	}
 
 #ifdef CONFIG_PCI_IOV
-	pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
+	pci_iov_add_virtfn(edev->physfn, edev->vf_index);
 #endif
 	return NULL;
 }
@@ -521,9 +521,7 @@
 
 	if (edev->physfn) {
 #ifdef CONFIG_PCI_IOV
-		struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-
-		pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
+		pci_iov_remove_virtfn(edev->physfn, edev->vf_index);
 		edev->pdev = NULL;
 #endif
 		if (rmv_data)
@@ -544,7 +542,7 @@
 			continue;
 
 		edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
-		eeh_rmv_from_parent_pe(edev);
+		eeh_pe_tree_remove(edev);
 	}
 
 	return NULL;
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 177852e..d2aaaa7 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -319,56 +319,22 @@
 }
 
 /**
- * eeh_pe_get_parent - Retrieve the parent PE
+ * eeh_pe_tree_insert - Add EEH device to parent PE
  * @edev: EEH device
+ * @new_pe_parent: PE to create additional PEs under
  *
- * The whole PEs existing in the system are organized as hierarchy
- * tree. The function is used to retrieve the parent PE according
- * to the parent EEH device.
- */
-static struct eeh_pe *eeh_pe_get_parent(struct eeh_dev *edev)
-{
-	struct eeh_dev *parent;
-	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-
-	/*
-	 * It might have the case for the indirect parent
-	 * EEH device already having associated PE, but
-	 * the direct parent EEH device doesn't have yet.
-	 */
-	if (edev->physfn)
-		pdn = pci_get_pdn(edev->physfn);
-	else
-		pdn = pdn ? pdn->parent : NULL;
-	while (pdn) {
-		/* We're poking out of PCI territory */
-		parent = pdn_to_eeh_dev(pdn);
-		if (!parent)
-			return NULL;
-
-		if (parent->pe)
-			return parent->pe;
-
-		pdn = pdn->parent;
-	}
-
-	return NULL;
-}
-
-/**
- * eeh_add_to_parent_pe - Add EEH device to parent PE
- * @edev: EEH device
+ * Add EEH device to the PE in edev->pe_config_addr. If a PE already
+ * exists with that address then @edev is added to that PE. Otherwise
+ * a new PE is created and inserted into the PE tree as a child of
+ * @new_pe_parent.
  *
- * Add EEH device to the parent PE. If the parent PE already
- * exists, the PE type will be changed to EEH_PE_BUS. Otherwise,
- * we have to create new PE to hold the EEH device and the new
- * PE will be linked to its parent PE as well.
+ * If @new_pe_parent is NULL then the new PE will be inserted under
+ * directly under the the PHB.
  */
-int eeh_add_to_parent_pe(struct eeh_dev *edev)
+int eeh_pe_tree_insert(struct eeh_dev *edev, struct eeh_pe *new_pe_parent)
 {
+	struct pci_controller *hose = edev->controller;
 	struct eeh_pe *pe, *parent;
-	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-	int config_addr = (pdn->busno << 8) | (pdn->devfn);
 
 	/* Check if the PE number is valid */
 	if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
@@ -382,7 +348,7 @@
 	 * PE should be composed of PCI bus and its subordinate
 	 * components.
 	 */
-	pe = eeh_pe_get(pdn->phb, edev->pe_config_addr, config_addr);
+	pe = eeh_pe_get(hose, edev->pe_config_addr, edev->bdfn);
 	if (pe) {
 		if (pe->type & EEH_PE_INVALID) {
 			list_add_tail(&edev->entry, &pe->edevs);
@@ -399,8 +365,7 @@
 				parent = parent->parent;
 			}
 
-			eeh_edev_dbg(edev,
-				     "Added to device PE (parent: PE#%x)\n",
+			eeh_edev_dbg(edev, "Added to existing PE (parent: PE#%x)\n",
 				     pe->parent->addr);
 		} else {
 			/* Mark the PE as type of PCI bus */
@@ -416,15 +381,15 @@
 
 	/* Create a new EEH PE */
 	if (edev->physfn)
-		pe = eeh_pe_alloc(pdn->phb, EEH_PE_VF);
+		pe = eeh_pe_alloc(hose, EEH_PE_VF);
 	else
-		pe = eeh_pe_alloc(pdn->phb, EEH_PE_DEVICE);
+		pe = eeh_pe_alloc(hose, EEH_PE_DEVICE);
 	if (!pe) {
 		pr_err("%s: out of memory!\n", __func__);
 		return -ENOMEM;
 	}
 	pe->addr	= edev->pe_config_addr;
-	pe->config_addr	= config_addr;
+	pe->config_addr	= edev->bdfn;
 
 	/*
 	 * Put the new EEH PE into hierarchy tree. If the parent
@@ -432,34 +397,35 @@
 	 * to PHB directly. Otherwise, we have to associate the
 	 * PE with its parent.
 	 */
-	parent = eeh_pe_get_parent(edev);
-	if (!parent) {
-		parent = eeh_phb_pe_get(pdn->phb);
-		if (!parent) {
+	if (!new_pe_parent) {
+		new_pe_parent = eeh_phb_pe_get(hose);
+		if (!new_pe_parent) {
 			pr_err("%s: No PHB PE is found (PHB Domain=%d)\n",
-				__func__, pdn->phb->global_number);
+				__func__, hose->global_number);
 			edev->pe = NULL;
 			kfree(pe);
 			return -EEXIST;
 		}
 	}
-	pe->parent = parent;
+
+	/* link new PE into the tree */
+	pe->parent = new_pe_parent;
+	list_add_tail(&pe->child, &new_pe_parent->child_list);
 
 	/*
 	 * Put the newly created PE into the child list and
 	 * link the EEH device accordingly.
 	 */
-	list_add_tail(&pe->child, &parent->child_list);
 	list_add_tail(&edev->entry, &pe->edevs);
 	edev->pe = pe;
-	eeh_edev_dbg(edev, "Added to device PE (parent: PE#%x)\n",
-		     pe->parent->addr);
+	eeh_edev_dbg(edev, "Added to new (parent: PE#%x)\n",
+		     new_pe_parent->addr);
 
 	return 0;
 }
 
 /**
- * eeh_rmv_from_parent_pe - Remove one EEH device from the associated PE
+ * eeh_pe_tree_remove - Remove one EEH device from the associated PE
  * @edev: EEH device
  *
  * The PE hierarchy tree might be changed when doing PCI hotplug.
@@ -467,7 +433,7 @@
  * during EEH recovery. So we have to call the function remove the
  * corresponding PE accordingly if necessary.
  */
-int eeh_rmv_from_parent_pe(struct eeh_dev *edev)
+int eeh_pe_tree_remove(struct eeh_dev *edev)
 {
 	struct eeh_pe *pe, *parent, *child;
 	bool keep, recover;
@@ -698,7 +664,6 @@
  */
 static void eeh_bridge_check_link(struct eeh_dev *edev)
 {
-	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	int cap;
 	uint32_t val;
 	int timeout = 0;
@@ -714,32 +679,32 @@
 
 	/* Check slot status */
 	cap = edev->pcie_cap;
-	eeh_ops->read_config(pdn, cap + PCI_EXP_SLTSTA, 2, &val);
+	eeh_ops->read_config(edev, cap + PCI_EXP_SLTSTA, 2, &val);
 	if (!(val & PCI_EXP_SLTSTA_PDS)) {
 		eeh_edev_dbg(edev, "No card in the slot (0x%04x) !\n", val);
 		return;
 	}
 
 	/* Check power status if we have the capability */
-	eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCAP, 2, &val);
+	eeh_ops->read_config(edev, cap + PCI_EXP_SLTCAP, 2, &val);
 	if (val & PCI_EXP_SLTCAP_PCP) {
-		eeh_ops->read_config(pdn, cap + PCI_EXP_SLTCTL, 2, &val);
+		eeh_ops->read_config(edev, cap + PCI_EXP_SLTCTL, 2, &val);
 		if (val & PCI_EXP_SLTCTL_PCC) {
 			eeh_edev_dbg(edev, "In power-off state, power it on ...\n");
 			val &= ~(PCI_EXP_SLTCTL_PCC | PCI_EXP_SLTCTL_PIC);
 			val |= (0x0100 & PCI_EXP_SLTCTL_PIC);
-			eeh_ops->write_config(pdn, cap + PCI_EXP_SLTCTL, 2, val);
+			eeh_ops->write_config(edev, cap + PCI_EXP_SLTCTL, 2, val);
 			msleep(2 * 1000);
 		}
 	}
 
 	/* Enable link */
-	eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCTL, 2, &val);
+	eeh_ops->read_config(edev, cap + PCI_EXP_LNKCTL, 2, &val);
 	val &= ~PCI_EXP_LNKCTL_LD;
-	eeh_ops->write_config(pdn, cap + PCI_EXP_LNKCTL, 2, val);
+	eeh_ops->write_config(edev, cap + PCI_EXP_LNKCTL, 2, val);
 
 	/* Check link */
-	eeh_ops->read_config(pdn, cap + PCI_EXP_LNKCAP, 4, &val);
+	eeh_ops->read_config(edev, cap + PCI_EXP_LNKCAP, 4, &val);
 	if (!(val & PCI_EXP_LNKCAP_DLLLARC)) {
 		eeh_edev_dbg(edev, "No link reporting capability (0x%08x) \n", val);
 		msleep(1000);
@@ -752,7 +717,7 @@
 		msleep(20);
 		timeout += 20;
 
-		eeh_ops->read_config(pdn, cap + PCI_EXP_LNKSTA, 2, &val);
+		eeh_ops->read_config(edev, cap + PCI_EXP_LNKSTA, 2, &val);
 		if (val & PCI_EXP_LNKSTA_DLLLA)
 			break;
 	}
@@ -769,7 +734,6 @@
 
 static void eeh_restore_bridge_bars(struct eeh_dev *edev)
 {
-	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	int i;
 
 	/*
@@ -777,20 +741,20 @@
 	 * Bus numbers and windows: 0x18 - 0x30
 	 */
 	for (i = 4; i < 13; i++)
-		eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]);
+		eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
 	/* Rom: 0x38 */
-	eeh_ops->write_config(pdn, 14*4, 4, edev->config_space[14]);
+	eeh_ops->write_config(edev, 14*4, 4, edev->config_space[14]);
 
 	/* Cache line & Latency timer: 0xC 0xD */
-	eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
+	eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
                 SAVED_BYTE(PCI_CACHE_LINE_SIZE));
-        eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1,
-                SAVED_BYTE(PCI_LATENCY_TIMER));
+	eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
+		SAVED_BYTE(PCI_LATENCY_TIMER));
 	/* Max latency, min grant, interrupt ping and line: 0x3C */
-	eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
+	eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
 
 	/* PCI Command: 0x4 */
-	eeh_ops->write_config(pdn, PCI_COMMAND, 4, edev->config_space[1] |
+	eeh_ops->write_config(edev, PCI_COMMAND, 4, edev->config_space[1] |
 			      PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
 
 	/* Check the PCIe link is ready */
@@ -799,28 +763,27 @@
 
 static void eeh_restore_device_bars(struct eeh_dev *edev)
 {
-	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
 	int i;
 	u32 cmd;
 
 	for (i = 4; i < 10; i++)
-		eeh_ops->write_config(pdn, i*4, 4, edev->config_space[i]);
+		eeh_ops->write_config(edev, i*4, 4, edev->config_space[i]);
 	/* 12 == Expansion ROM Address */
-	eeh_ops->write_config(pdn, 12*4, 4, edev->config_space[12]);
+	eeh_ops->write_config(edev, 12*4, 4, edev->config_space[12]);
 
-	eeh_ops->write_config(pdn, PCI_CACHE_LINE_SIZE, 1,
+	eeh_ops->write_config(edev, PCI_CACHE_LINE_SIZE, 1,
 		SAVED_BYTE(PCI_CACHE_LINE_SIZE));
-	eeh_ops->write_config(pdn, PCI_LATENCY_TIMER, 1,
+	eeh_ops->write_config(edev, PCI_LATENCY_TIMER, 1,
 		SAVED_BYTE(PCI_LATENCY_TIMER));
 
 	/* max latency, min grant, interrupt pin and line */
-	eeh_ops->write_config(pdn, 15*4, 4, edev->config_space[15]);
+	eeh_ops->write_config(edev, 15*4, 4, edev->config_space[15]);
 
 	/*
 	 * Restore PERR & SERR bits, some devices require it,
 	 * don't touch the other command bits
 	 */
-	eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cmd);
+	eeh_ops->read_config(edev, PCI_COMMAND, 4, &cmd);
 	if (edev->config_space[1] & PCI_COMMAND_PARITY)
 		cmd |= PCI_COMMAND_PARITY;
 	else
@@ -829,7 +792,7 @@
 		cmd |= PCI_COMMAND_SERR;
 	else
 		cmd &= ~PCI_COMMAND_SERR;
-	eeh_ops->write_config(pdn, PCI_COMMAND, 4, cmd);
+	eeh_ops->write_config(edev, PCI_COMMAND, 4, cmd);
 }
 
 /**
@@ -843,16 +806,14 @@
  */
 static void eeh_restore_one_device_bars(struct eeh_dev *edev, void *flag)
 {
-	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
-
 	/* Do special restore for bridges */
 	if (edev->mode & EEH_DEV_BRIDGE)
 		eeh_restore_bridge_bars(edev);
 	else
 		eeh_restore_device_bars(edev);
 
-	if (eeh_ops->restore_config && pdn)
-		eeh_ops->restore_config(pdn);
+	if (eeh_ops->restore_config)
+		eeh_ops->restore_config(edev);
 }
 
 /**
diff --git a/arch/powerpc/kernel/eeh_sysfs.c b/arch/powerpc/kernel/eeh_sysfs.c
index 4fb0f1e..429620d 100644
--- a/arch/powerpc/kernel/eeh_sysfs.c
+++ b/arch/powerpc/kernel/eeh_sysfs.c
@@ -99,7 +99,7 @@
 	if (!edev || !edev->pe || !eeh_ops->notify_resume)
 		return -ENODEV;
 
-	if (eeh_ops->notify_resume(pci_get_pdn(pdev)))
+	if (eeh_ops->notify_resume(edev))
 		return -EIO;
 
 	return count;
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 217ebdf..f4d0af8 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -36,6 +36,12 @@
 #include "head_32.h"
 
 /*
+ * powerpc relies on return from interrupt/syscall being context synchronising
+ * (which rfi is) to support ARCH_HAS_MEMBARRIER_SYNC_CORE without additional
+ * synchronisation instructions.
+ */
+
+/*
  * Align to 4k in order to ensure that all functions modyfing srr0/srr1
  * fit into one page in order to not encounter a TLB miss between the
  * modification of srr0/srr1 and the associated rfi.
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 9d49338..33a42e4 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -64,15 +64,173 @@
 	.section	".text"
 	.align 7
 
-	.globl system_call_common
-system_call_common:
+#ifdef CONFIG_PPC_BOOK3S
+.macro system_call_vectored name trapnr
+	.globl system_call_vectored_\name
+system_call_vectored_\name:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 BEGIN_FTR_SECTION
 	extrdi.	r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
 	bne	.Ltabort_syscall
 END_FTR_SECTION_IFSET(CPU_FTR_TM)
 #endif
+	INTERRUPT_TO_KERNEL
+	mr	r10,r1
+	ld	r1,PACAKSAVE(r13)
+	std	r10,0(r1)
+	std	r11,_NIP(r1)
+	std	r12,_MSR(r1)
+	std	r0,GPR0(r1)
+	std	r10,GPR1(r1)
+	std	r2,GPR2(r1)
+	ld	r2,PACATOC(r13)
+	mfcr	r12
+	li	r11,0
+	/* Can we avoid saving r3-r8 in common case? */
+	std	r3,GPR3(r1)
+	std	r4,GPR4(r1)
+	std	r5,GPR5(r1)
+	std	r6,GPR6(r1)
+	std	r7,GPR7(r1)
+	std	r8,GPR8(r1)
+	/* Zero r9-r12, this should only be required when restoring all GPRs */
+	std	r11,GPR9(r1)
+	std	r11,GPR10(r1)
+	std	r11,GPR11(r1)
+	std	r11,GPR12(r1)
+	std	r9,GPR13(r1)
+	SAVE_NVGPRS(r1)
+	std	r11,_XER(r1)
+	std	r11,_LINK(r1)
+	std	r11,_CTR(r1)
+
+	li	r11,\trapnr
+	std	r11,_TRAP(r1)
+	std	r12,_CCR(r1)
+	std	r3,ORIG_GPR3(r1)
+	addi	r10,r1,STACK_FRAME_OVERHEAD
+	ld	r11,exception_marker@toc(r2)
+	std	r11,-16(r10)		/* "regshere" marker */
+
+	/*
+	 * RECONCILE_IRQ_STATE without calling trace_hardirqs_off(), which
+	 * would clobber syscall parameters. Also we always enter with IRQs
+	 * enabled and nothing pending. system_call_exception() will call
+	 * trace_hardirqs_off().
+	 *
+	 * scv enters with MSR[EE]=1, so don't set PACA_IRQ_HARD_DIS. The
+	 * entry vector already sets PACAIRQSOFTMASK to IRQS_ALL_DISABLED.
+	 */
+
+	/* Calling convention has r9 = orig r0, r10 = regs */
+	mr	r9,r0
+	bl	system_call_exception
+
+.Lsyscall_vectored_\name\()_exit:
+	addi    r4,r1,STACK_FRAME_OVERHEAD
+	li	r5,1 /* scv */
+	bl	syscall_exit_prepare
+
+	ld	r2,_CCR(r1)
+	ld	r4,_NIP(r1)
+	ld	r5,_MSR(r1)
+
+BEGIN_FTR_SECTION
+	stdcx.	r0,0,r1			/* to clear the reservation */
+END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
+
+BEGIN_FTR_SECTION
+	HMT_MEDIUM_LOW
+END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
+
+	cmpdi	r3,0
+	bne	.Lsyscall_vectored_\name\()_restore_regs
+
+	/* rfscv returns with LR->NIA and CTR->MSR */
+	mtlr	r4
+	mtctr	r5
+
+	/* Could zero these as per ABI, but we may consider a stricter ABI
+	 * which preserves these if libc implementations can benefit, so
+	 * restore them for now until further measurement is done. */
+	ld	r0,GPR0(r1)
+	ld	r4,GPR4(r1)
+	ld	r5,GPR5(r1)
+	ld	r6,GPR6(r1)
+	ld	r7,GPR7(r1)
+	ld	r8,GPR8(r1)
+	/* Zero volatile regs that may contain sensitive kernel data */
+	li	r9,0
+	li	r10,0
+	li	r11,0
+	li	r12,0
+	mtspr	SPRN_XER,r0
+
+	/*
+	 * We don't need to restore AMR on the way back to userspace for KUAP.
+	 * The value of AMR only matters while we're in the kernel.
+	 */
+	mtcr	r2
+	ld	r2,GPR2(r1)
+	ld	r3,GPR3(r1)
+	ld	r13,GPR13(r1)
+	ld	r1,GPR1(r1)
+	RFSCV_TO_USER
+	b	.	/* prevent speculative execution */
+
+.Lsyscall_vectored_\name\()_restore_regs:
+	li	r3,0
+	mtmsrd	r3,1
+	mtspr	SPRN_SRR0,r4
+	mtspr	SPRN_SRR1,r5
+
+	ld	r3,_CTR(r1)
+	ld	r4,_LINK(r1)
+	ld	r5,_XER(r1)
+
+	REST_NVGPRS(r1)
+	ld	r0,GPR0(r1)
+	mtcr	r2
+	mtctr	r3
+	mtlr	r4
+	mtspr	SPRN_XER,r5
+	REST_10GPRS(2, r1)
+	REST_2GPRS(12, r1)
+	ld	r1,GPR1(r1)
+	RFI_TO_USER
+.endm
+
+system_call_vectored common 0x3000
+/*
+ * We instantiate another entry copy for the SIGILL variant, with TRAP=0x7ff0
+ * which is tested by system_call_exception when r0 is -1 (as set by vector
+ * entry code).
+ */
+system_call_vectored sigill 0x7ff0
+
+
+/*
+ * Entered via kernel return set up by kernel/sstep.c, must match entry regs
+ */
+	.globl system_call_vectored_emulate
+system_call_vectored_emulate:
+_ASM_NOKPROBE_SYMBOL(system_call_vectored_emulate)
+	li	r10,IRQS_ALL_DISABLED
+	stb	r10,PACAIRQSOFTMASK(r13)
+	b	system_call_vectored_common
+#endif
+
+	.balign IFETCH_ALIGN_BYTES
+	.globl system_call_common
+system_call_common:
 _ASM_NOKPROBE_SYMBOL(system_call_common)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+BEGIN_FTR_SECTION
+	extrdi.	r10, r12, 1, (63-MSR_TS_T_LG) /* transaction active? */
+	bne	.Ltabort_syscall
+END_FTR_SECTION_IFSET(CPU_FTR_TM)
+#endif
 	mr	r10,r1
 	ld	r1,PACAKSAVE(r13)
 	std	r10,0(r1)
@@ -138,6 +296,7 @@
 
 .Lsyscall_exit:
 	addi    r4,r1,STACK_FRAME_OVERHEAD
+	li	r5,0 /* !scv */
 	bl	syscall_exit_prepare
 
 	ld	r2,_CCR(r1)
@@ -224,21 +383,29 @@
 	b	.	/* prevent speculative execution */
 #endif
 
+#ifdef CONFIG_PPC_BOOK3S
+_GLOBAL(ret_from_fork_scv)
+	bl	schedule_tail
+	REST_NVGPRS(r1)
+	li	r3,0	/* fork() return value */
+	b	.Lsyscall_vectored_common_exit
+#endif
+
 _GLOBAL(ret_from_fork)
 	bl	schedule_tail
 	REST_NVGPRS(r1)
-	li	r3,0
+	li	r3,0	/* fork() return value */
 	b	.Lsyscall_exit
 
 _GLOBAL(ret_from_kernel_thread)
 	bl	schedule_tail
 	REST_NVGPRS(r1)
-	mtlr	r14
+	mtctr	r14
 	mr	r3,r15
 #ifdef PPC64_ELF_ABI_v2
 	mr	r12,r14
 #endif
-	blrl
+	bctrl
 	li	r3,0
 	b	.Lsyscall_exit
 
@@ -259,10 +426,7 @@
 
 #define FLUSH_COUNT_CACHE	\
 1:	nop;			\
-	patch_site 1b, patch__call_flush_count_cache
-
-
-#define BCCTR_FLUSH	.long 0x4c400420
+	patch_site 1b, patch__call_flush_branch_caches
 
 .macro nops number
 	.rept \number
@@ -271,8 +435,8 @@
 .endm
 
 .balign 32
-.global flush_count_cache
-flush_count_cache:
+.global flush_branch_caches
+flush_branch_caches:
 	/* Save LR into r9 */
 	mflr	r9
 
@@ -294,7 +458,7 @@
 	li	r9,0x7fff
 	mtctr	r9
 
-	BCCTR_FLUSH
+	PPC_BCCTR_FLUSH
 
 2:	nop
 	patch_site 2b patch__flush_count_cache_return
@@ -303,7 +467,7 @@
 
 	.rept 278
 	.balign 32
-	BCCTR_FLUSH
+	PPC_BCCTR_FLUSH
 	nops	7
 	.endr
 
@@ -357,7 +521,7 @@
 	 * kernel/sched/core.c).
 	 *
 	 * Uncacheable stores in the case of involuntary preemption must
-	 * be taken care of. The smp_mb__before_spin_lock() in __schedule()
+	 * be taken care of. The smp_mb__after_spinlock() in __schedule()
 	 * is implemented as hwsync on powerpc, which orders MMIO too. So
 	 * long as there is an hwsync in the context switch path, it will
 	 * be executed on the source CPU after the task has performed
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 446e54c..f7d748b 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -508,8 +508,24 @@
 
 .macro __GEN_COMMON_BODY name
 	.if IMASK
+		.if ! ISTACK
+		.error "No support for masked interrupt to use custom stack"
+		.endif
+
+		/* If coming from user, skip soft-mask tests. */
+		andi.	r10,r12,MSR_PR
+		bne	2f
+
+		/* Kernel code running below __end_interrupts is implicitly
+		 * soft-masked */
+		LOAD_HANDLER(r10, __end_interrupts)
+		cmpld	r11,r10
+		li	r10,IMASK
+		blt-	1f
+
+		/* Test the soft mask state against our interrupt's bit */
 		lbz	r10,PACAIRQSOFTMASK(r13)
-		andi.	r10,r10,IMASK
+1:		andi.	r10,r10,IMASK
 		/* Associate vector numbers with bits in paca->irq_happened */
 		.if IVEC == 0x500 || IVEC == 0xea0
 		li	r10,PACA_IRQ_EE
@@ -540,7 +556,7 @@
 
 	.if ISTACK
 	andi.	r10,r12,MSR_PR		/* See if coming from user	*/
-	mr	r10,r1			/* Save r1			*/
+2:	mr	r10,r1			/* Save r1			*/
 	subi	r1,r1,INT_FRAME_SIZE	/* alloc frame on kernel stack	*/
 	beq-	100f
 	ld	r1,PACAKSAVE(r13)	/* kernel stack to use		*/
@@ -740,6 +756,9 @@
  * guarantee they will be delivered virtually. Some conditions (see the ISA)
  * cause exceptions to be delivered in real mode.
  *
+ * The scv instructions are a special case. They get a 0x3000 offset applied.
+ * scv exceptions have unique reentrancy properties, see below.
+ *
  * It's impossible to receive interrupts below 0x300 via AIL.
  *
  * KVM: None of the virtual exceptions are from the guest. Anything that
@@ -749,8 +768,8 @@
  * We layout physical memory as follows:
  * 0x0000 - 0x00ff : Secondary processor spin code
  * 0x0100 - 0x18ff : Real mode pSeries interrupt vectors
- * 0x1900 - 0x3fff : Real mode trampolines
- * 0x4000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
+ * 0x1900 - 0x2fff : Real mode trampolines
+ * 0x3000 - 0x58ff : Relon (IR=1,DR=1) mode pSeries interrupt vectors
  * 0x5900 - 0x6fff : Relon mode trampolines
  * 0x7000 - 0x7fff : FWNMI data area
  * 0x8000 -   .... : Common interrupt handlers, remaining early
@@ -761,8 +780,8 @@
  * vectors there.
  */
 OPEN_FIXED_SECTION(real_vectors,        0x0100, 0x1900)
-OPEN_FIXED_SECTION(real_trampolines,    0x1900, 0x4000)
-OPEN_FIXED_SECTION(virt_vectors,        0x4000, 0x5900)
+OPEN_FIXED_SECTION(real_trampolines,    0x1900, 0x3000)
+OPEN_FIXED_SECTION(virt_vectors,        0x3000, 0x5900)
 OPEN_FIXED_SECTION(virt_trampolines,    0x5900, 0x7000)
 
 #ifdef CONFIG_PPC_POWERNV
@@ -798,6 +817,77 @@
 	.globl __start_interrupts
 __start_interrupts:
 
+/**
+ * Interrupt 0x3000 - System Call Vectored Interrupt (syscall).
+ * This is a synchronous interrupt invoked with the "scv" instruction. The
+ * system call does not alter the HV bit, so it is directed to the OS.
+ *
+ * Handling:
+ * scv instructions enter the kernel without changing EE, RI, ME, or HV.
+ * In particular, this means we can take a maskable interrupt at any point
+ * in the scv handler, which is unlike any other interrupt. This is solved
+ * by treating the instruction addresses below __end_interrupts as being
+ * soft-masked.
+ *
+ * AIL-0 mode scv exceptions go to 0x17000-0x17fff, but we set AIL-3 and
+ * ensure scv is never executed with relocation off, which means AIL-0
+ * should never happen.
+ *
+ * Before leaving the below __end_interrupts text, at least of the following
+ * must be true:
+ * - MSR[PR]=1 (i.e., return to userspace)
+ * - MSR_EE|MSR_RI is set (no reentrant exceptions)
+ * - Standard kernel environment is set up (stack, paca, etc)
+ *
+ * Call convention:
+ *
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
+ */
+EXC_VIRT_BEGIN(system_call_vectored, 0x3000, 0x1000)
+	/* SCV 0 */
+	mr	r9,r13
+	GET_PACA(r13)
+	mflr	r11
+	mfctr	r12
+	li	r10,IRQS_ALL_DISABLED
+	stb	r10,PACAIRQSOFTMASK(r13)
+#ifdef CONFIG_RELOCATABLE
+	b	system_call_vectored_tramp
+#else
+	b	system_call_vectored_common
+#endif
+	nop
+
+	/* SCV 1 - 127 */
+	.rept	127
+	mr	r9,r13
+	GET_PACA(r13)
+	mflr	r11
+	mfctr	r12
+	li	r10,IRQS_ALL_DISABLED
+	stb	r10,PACAIRQSOFTMASK(r13)
+	li	r0,-1 /* cause failure */
+#ifdef CONFIG_RELOCATABLE
+	b	system_call_vectored_sigill_tramp
+#else
+	b	system_call_vectored_sigill
+#endif
+	.endr
+EXC_VIRT_END(system_call_vectored, 0x3000, 0x1000)
+
+#ifdef CONFIG_RELOCATABLE
+TRAMP_VIRT_BEGIN(system_call_vectored_tramp)
+	__LOAD_HANDLER(r10, system_call_vectored_common)
+	mtctr	r10
+	bctr
+
+TRAMP_VIRT_BEGIN(system_call_vectored_sigill_tramp)
+	__LOAD_HANDLER(r10, system_call_vectored_sigill)
+	mtctr	r10
+	bctr
+#endif
+
+
 /* No virt vectors corresponding with 0x0..0x100 */
 EXC_VIRT_NONE(0x4000, 0x100)
 
@@ -2838,7 +2928,8 @@
 	ld	r10,PACA_EXGEN+EX_R10(r13)
 	ld	r11,PACA_EXGEN+EX_R11(r13)
 	ld	r12,PACA_EXGEN+EX_R12(r13)
-	/* returns to kernel where r13 must be set up, so don't restore it */
+	ld	r13,PACA_EXGEN+EX_R13(r13)
+	/* May return to masked low address where r13 is not set up */
 	.if \hsrr
 	HRFI_TO_KERNEL
 	.else
@@ -2946,6 +3037,47 @@
 	GET_SCRATCH0(r13);
 	hrfid
 
+TRAMP_REAL_BEGIN(rfscv_flush_fallback)
+	/* system call volatile */
+	mr	r7,r13
+	GET_PACA(r13);
+	mr	r8,r1
+	ld	r1,PACAKSAVE(r13)
+	mfctr	r9
+	ld	r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13)
+	ld	r11,PACA_L1D_FLUSH_SIZE(r13)
+	srdi	r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */
+	mtctr	r11
+	DCBT_BOOK3S_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */
+
+	/* order ld/st prior to dcbt stop all streams with flushing */
+	sync
+
+	/*
+	 * The load adresses are at staggered offsets within cachelines,
+	 * which suits some pipelines better (on others it should not
+	 * hurt).
+	 */
+1:
+	ld	r11,(0x80 + 8)*0(r10)
+	ld	r11,(0x80 + 8)*1(r10)
+	ld	r11,(0x80 + 8)*2(r10)
+	ld	r11,(0x80 + 8)*3(r10)
+	ld	r11,(0x80 + 8)*4(r10)
+	ld	r11,(0x80 + 8)*5(r10)
+	ld	r11,(0x80 + 8)*6(r10)
+	ld	r11,(0x80 + 8)*7(r10)
+	addi	r10,r10,0x80*8
+	bdnz	1b
+
+	mtctr	r9
+	li	r9,0
+	li	r10,0
+	li	r11,0
+	mr	r1,r8
+	mr	r13,r7
+	RFSCV
+
 USE_TEXT_SECTION()
 	MASKED_INTERRUPT
 	MASKED_INTERRUPT hsrr=1
@@ -2997,6 +3129,10 @@
 
 USE_FIXED_SECTION(virt_trampolines)
 	/*
+	 * All code below __end_interrupts is treated as soft-masked. If
+	 * any code runs here with MSR[EE]=1, it must then cope with pending
+	 * soft interrupt being raised (i.e., by ensuring it is replayed).
+	 *
 	 * The __end_interrupts marker must be past the out-of-line (OOL)
 	 * handlers, so that they are copied to real address 0x100 when running
 	 * a relocatable kernel. This ensures they can be reached from the short
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 78ab9a6..10ebb4b 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -32,6 +32,14 @@
 #include <asm/fadump-internal.h>
 #include <asm/setup.h>
 
+/*
+ * The CPU who acquired the lock to trigger the fadump crash should
+ * wait for other CPUs to enter.
+ *
+ * The timeout is in milliseconds.
+ */
+#define CRASH_TIMEOUT		500
+
 static struct fw_dump fw_dump;
 
 static void __init fadump_reserve_crash_area(u64 base);
@@ -39,7 +47,10 @@
 struct kobject *fadump_kobj;
 
 #ifndef CONFIG_PRESERVE_FA_DUMP
+
+static atomic_t cpus_in_fadump;
 static DEFINE_MUTEX(fadump_mutex);
+
 struct fadump_mrange_info crash_mrange_info = { "crash", NULL, 0, 0, 0, false };
 
 #define RESERVED_RNGS_SZ	16384 /* 16K - 128 entries */
@@ -668,8 +679,11 @@
 
 void crash_fadump(struct pt_regs *regs, const char *str)
 {
+	unsigned int msecs;
 	struct fadump_crash_info_header *fdh = NULL;
 	int old_cpu, this_cpu;
+	/* Do not include first CPU */
+	unsigned int ncpus = num_online_cpus() - 1;
 
 	if (!should_fadump_crash())
 		return;
@@ -685,6 +699,8 @@
 	old_cpu = cmpxchg(&crashing_cpu, -1, this_cpu);
 
 	if (old_cpu != -1) {
+		atomic_inc(&cpus_in_fadump);
+
 		/*
 		 * We can't loop here indefinitely. Wait as long as fadump
 		 * is in force. If we race with fadump un-registration this
@@ -708,6 +724,16 @@
 
 	fdh->online_mask = *cpu_online_mask;
 
+	/*
+	 * If we came in via system reset, wait a while for the secondary
+	 * CPUs to enter.
+	 */
+	if (TRAP(&(fdh->regs)) == 0x100) {
+		msecs = CRASH_TIMEOUT;
+		while ((atomic_read(&cpus_in_fadump) < ncpus) && (--msecs > 0))
+			mdelay(1);
+	}
+
 	fw_dump.ops->fadump_trigger(fdh, str);
 }
 
diff --git a/arch/powerpc/kernel/firmware.c b/arch/powerpc/kernel/firmware.c
index cc4a5e3f..fe48d31 100644
--- a/arch/powerpc/kernel/firmware.c
+++ b/arch/powerpc/kernel/firmware.c
@@ -11,8 +11,27 @@
 
 #include <linux/export.h>
 #include <linux/cache.h>
+#include <linux/of.h>
 
 #include <asm/firmware.h>
 
+#ifdef CONFIG_PPC64
 unsigned long powerpc_firmware_features __read_mostly;
 EXPORT_SYMBOL_GPL(powerpc_firmware_features);
+#endif
+
+#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_KVM_GUEST)
+bool is_kvm_guest(void)
+{
+	struct device_node *hyper_node;
+
+	hyper_node = of_find_node_by_path("/hypervisor");
+	if (!hyper_node)
+		return 0;
+
+	if (!of_device_is_compatible(hyper_node, "linux,kvm"))
+		return 0;
+
+	return 1;
+}
+#endif
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index cac22cb..4ae39db 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -107,9 +107,7 @@
 	or	r12,r12,r4
 	std	r12,_MSR(r1)
 #endif
-	/* Don't care if r4 overflows, this is desired behaviour */
-	lbz	r4,THREAD_LOAD_FP(r5)
-	addi	r4,r4,1
+	li	r4,1
 	stb	r4,THREAD_LOAD_FP(r5)
 	addi	r10,r5,THREAD_FPSTATE
 	lfd	fr0,FPSTATE_FPSCR(r10)
diff --git a/arch/powerpc/kernel/head_32.S b/arch/powerpc/kernel/head_32.S
index 705c042..f3ab94d 100644
--- a/arch/powerpc/kernel/head_32.S
+++ b/arch/powerpc/kernel/head_32.S
@@ -474,7 +474,7 @@
 	/* Get PTE (linux-style) and check access */
 	mfspr	r3,SPRN_IMISS
 #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
-	lis	r1,PAGE_OFFSET@h		/* check if kernel address */
+	lis	r1, TASK_SIZE@h		/* check if kernel address */
 	cmplw	0,r1,r3
 #endif
 	mfspr	r2, SPRN_SPRG_PGDIR
@@ -484,7 +484,7 @@
 	li	r1,_PAGE_PRESENT | _PAGE_EXEC
 #endif
 #if defined(CONFIG_MODULES) || defined(CONFIG_DEBUG_PAGEALLOC)
-	bge-	112f
+	bgt-	112f
 	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
 	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
 #endif
@@ -541,7 +541,7 @@
  */
 	/* Get PTE (linux-style) and check access */
 	mfspr	r3,SPRN_DMISS
-	lis	r1,PAGE_OFFSET@h		/* check if kernel address */
+	lis	r1, TASK_SIZE@h		/* check if kernel address */
 	cmplw	0,r1,r3
 	mfspr	r2, SPRN_SPRG_PGDIR
 #ifdef CONFIG_SWAP
@@ -549,7 +549,7 @@
 #else
 	li	r1, _PAGE_PRESENT
 #endif
-	bge-	112f
+	bgt-	112f
 	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
 	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
 112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
@@ -621,7 +621,7 @@
  */
 	/* Get PTE (linux-style) and check access */
 	mfspr	r3,SPRN_DMISS
-	lis	r1,PAGE_OFFSET@h		/* check if kernel address */
+	lis	r1, TASK_SIZE@h		/* check if kernel address */
 	cmplw	0,r1,r3
 	mfspr	r2, SPRN_SPRG_PGDIR
 #ifdef CONFIG_SWAP
@@ -629,7 +629,7 @@
 #else
 	li	r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT
 #endif
-	bge-	112f
+	bgt-	112f
 	lis	r2, (swapper_pg_dir - PAGE_OFFSET)@ha	/* if kernel address, use */
 	addi	r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l	/* kernel page table */
 112:	rlwimi	r2,r3,12,20,29		/* insert top 10 bits of address */
@@ -673,6 +673,10 @@
 #define altivec_assist_exception	unknown_exception
 #endif
 
+#ifndef CONFIG_TAU_INT
+#define TAUException	unknown_exception
+#endif
+
 	EXCEPTION(0x1300, Trap_13, instruction_breakpoint_exception, EXC_XFER_STD)
 	EXCEPTION(0x1400, SMI, SMIException, EXC_XFER_STD)
 	EXCEPTION(0x1500, Trap_15, unknown_exception, EXC_XFER_STD)
diff --git a/arch/powerpc/kernel/head_40x.S b/arch/powerpc/kernel/head_40x.S
index 926bfa7..5b282d9 100644
--- a/arch/powerpc/kernel/head_40x.S
+++ b/arch/powerpc/kernel/head_40x.S
@@ -620,7 +620,7 @@
 	ori	r6, r6, swapper_pg_dir@l
 	lis	r5, abatron_pteptrs@h
 	ori	r5, r5, abatron_pteptrs@l
-	stw	r5, 0xf0(r0)	/* Must match your Abatron config file */
+	stw	r5, 0xf0(0)	/* Must match your Abatron config file */
 	tophys(r5,r5)
 	stw	r6, 0(r5)
 
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 0000daf..1f4a1ef 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -418,8 +418,9 @@
 
 	if (dawr_enabled()) {
 		max_len = DAWR_MAX_LEN;
-		/* DAWR region can't cross 512 bytes boundary */
-		if (ALIGN(start_addr, SZ_512M) != ALIGN(end_addr - 1, SZ_512M))
+		/* DAWR region can't cross 512 bytes boundary on p10 predecessors */
+		if (!cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    (ALIGN_DOWN(start_addr, SZ_512) != ALIGN_DOWN(end_addr - 1, SZ_512)))
 			return -EINVAL;
 	} else if (IS_ENABLED(CONFIG_PPC_8xx)) {
 		/* 8xx can setup a range without limitation */
@@ -498,11 +499,11 @@
 	return ((info->address <= dar) && (dar - info->address < info->len));
 }
 
-static bool dar_user_range_overlaps(unsigned long dar, int size,
-				    struct arch_hw_breakpoint *info)
+static bool ea_user_range_overlaps(unsigned long ea, int size,
+				   struct arch_hw_breakpoint *info)
 {
-	return ((dar < info->address + info->len) &&
-		(dar + size > info->address));
+	return ((ea < info->address + info->len) &&
+		(ea + size > info->address));
 }
 
 static bool dar_in_hw_range(unsigned long dar, struct arch_hw_breakpoint *info)
@@ -515,20 +516,22 @@
 	return ((hw_start_addr <= dar) && (hw_end_addr > dar));
 }
 
-static bool dar_hw_range_overlaps(unsigned long dar, int size,
-				  struct arch_hw_breakpoint *info)
+static bool ea_hw_range_overlaps(unsigned long ea, int size,
+				 struct arch_hw_breakpoint *info)
 {
 	unsigned long hw_start_addr, hw_end_addr;
 
 	hw_start_addr = ALIGN_DOWN(info->address, HW_BREAKPOINT_SIZE);
 	hw_end_addr = ALIGN(info->address + info->len, HW_BREAKPOINT_SIZE);
 
-	return ((dar < hw_end_addr) && (dar + size > hw_start_addr));
+	return ((ea < hw_end_addr) && (ea + size > hw_start_addr));
 }
 
 /*
  * If hw has multiple DAWR registers, we also need to check all
  * dawrx constraint bits to confirm this is _really_ a valid event.
+ * If type is UNKNOWN, but privilege level matches, consider it as
+ * a positive match.
  */
 static bool check_dawrx_constraints(struct pt_regs *regs, int type,
 				    struct arch_hw_breakpoint *info)
@@ -536,7 +539,12 @@
 	if (OP_IS_LOAD(type) && !(info->type & HW_BRK_TYPE_READ))
 		return false;
 
-	if (OP_IS_STORE(type) && !(info->type & HW_BRK_TYPE_WRITE))
+	/*
+	 * The Cache Management instructions other than dcbz never
+	 * cause a match. i.e. if type is CACHEOP, the instruction
+	 * is dcbz, and dcbz is treated as Store.
+	 */
+	if ((OP_IS_STORE(type) || type == CACHEOP) && !(info->type & HW_BRK_TYPE_WRITE))
 		return false;
 
 	if (is_kernel_addr(regs->nip) && !(info->type & HW_BRK_TYPE_KERNEL))
@@ -553,7 +561,8 @@
  * including extraneous exception. Otherwise return false.
  */
 static bool check_constraints(struct pt_regs *regs, struct ppc_inst instr,
-			      int type, int size, struct arch_hw_breakpoint *info)
+			      unsigned long ea, int type, int size,
+			      struct arch_hw_breakpoint *info)
 {
 	bool in_user_range = dar_in_user_range(regs->dar, info);
 	bool dawrx_constraints;
@@ -569,22 +578,27 @@
 	}
 
 	if (unlikely(ppc_inst_equal(instr, ppc_inst(0)))) {
-		if (in_user_range)
-			return true;
+		if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    !dar_in_hw_range(regs->dar, info))
+			return false;
 
-		if (dar_in_hw_range(regs->dar, info)) {
-			info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-			return true;
-		}
-		return false;
+		return true;
 	}
 
 	dawrx_constraints = check_dawrx_constraints(regs, type, info);
 
-	if (dar_user_range_overlaps(regs->dar, size, info))
+	if (type == UNKNOWN) {
+		if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+		    !dar_in_hw_range(regs->dar, info))
+			return false;
+
+		return dawrx_constraints;
+	}
+
+	if (ea_user_range_overlaps(ea, size, info))
 		return dawrx_constraints;
 
-	if (dar_hw_range_overlaps(regs->dar, size, info)) {
+	if (ea_hw_range_overlaps(ea, size, info)) {
 		if (dawrx_constraints) {
 			info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
 			return true;
@@ -593,8 +607,17 @@
 	return false;
 }
 
+static int cache_op_size(void)
+{
+#ifdef __powerpc64__
+	return ppc64_caches.l1d.block_size;
+#else
+	return L1_CACHE_BYTES;
+#endif
+}
+
 static void get_instr_detail(struct pt_regs *regs, struct ppc_inst *instr,
-			     int *type, int *size, bool *larx_stcx)
+			     int *type, int *size, unsigned long *ea)
 {
 	struct instruction_op op;
 
@@ -602,16 +625,23 @@
 		return;
 
 	analyse_instr(&op, regs, *instr);
-
-	/*
-	 * Set size = 8 if analyse_instr() fails. If it's a userspace
-	 * watchpoint(valid or extraneous), we can notify user about it.
-	 * If it's a kernel watchpoint, instruction  emulation will fail
-	 * in stepping_handler() and watchpoint will be disabled.
-	 */
 	*type = GETTYPE(op.type);
-	*size = !(*type == UNKNOWN) ? GETSIZE(op.type) : 8;
-	*larx_stcx = (*type == LARX || *type == STCX);
+	*ea = op.ea;
+#ifdef __powerpc64__
+	if (!(regs->msr & MSR_64BIT))
+		*ea &= 0xffffffffUL;
+#endif
+
+	*size = GETSIZE(op.type);
+	if (*type == CACHEOP) {
+		*size = cache_op_size();
+		*ea &= ~(*size - 1);
+	}
+}
+
+static bool is_larx_stcx_instr(int type)
+{
+	return type == LARX || type == STCX;
 }
 
 /*
@@ -678,7 +708,7 @@
 	struct ppc_inst instr = ppc_inst(0);
 	int type = 0;
 	int size = 0;
-	bool larx_stcx = false;
+	unsigned long ea;
 
 	/* Disable breakpoints during exception handling */
 	hw_breakpoint_disable();
@@ -692,7 +722,7 @@
 	rcu_read_lock();
 
 	if (!IS_ENABLED(CONFIG_PPC_8xx))
-		get_instr_detail(regs, &instr, &type, &size, &larx_stcx);
+		get_instr_detail(regs, &instr, &type, &size, &ea);
 
 	for (i = 0; i < nr_wp_slots(); i++) {
 		bp[i] = __this_cpu_read(bp_per_reg[i]);
@@ -702,7 +732,7 @@
 		info[i] = counter_arch_bp(bp[i]);
 		info[i]->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
 
-		if (check_constraints(regs, instr, type, size, info[i])) {
+		if (check_constraints(regs, instr, ea, type, size, info[i])) {
 			if (!IS_ENABLED(CONFIG_PPC_8xx) &&
 			    ppc_inst_equal(instr, ppc_inst(0))) {
 				handler_error(bp[i], info[i]);
@@ -744,7 +774,7 @@
 	}
 
 	if (!IS_ENABLED(CONFIG_PPC_8xx)) {
-		if (larx_stcx) {
+		if (is_larx_stcx_instr(type)) {
 			for (i = 0; i < nr_wp_slots(); i++) {
 				if (!hit[i])
 					continue;
diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 05b1cc0..bf21ebd 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -621,13 +621,14 @@
 		seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions);
 	seq_printf(p, "  Machine check exceptions\n");
 
+#ifdef CONFIG_PPC_BOOK3S_64
 	if (cpu_has_feature(CPU_FTR_HVMODE)) {
 		seq_printf(p, "%*s: ", prec, "HMI");
 		for_each_online_cpu(j)
-			seq_printf(p, "%10u ",
-					per_cpu(irq_stat, j).hmi_exceptions);
+			seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs);
 		seq_printf(p, "  Hypervisor Maintenance Interrupts\n");
 	}
+#endif
 
 	seq_printf(p, "%*s: ", prec, "NMI");
 	for_each_online_cpu(j)
@@ -665,7 +666,9 @@
 	sum += per_cpu(irq_stat, cpu).mce_exceptions;
 	sum += per_cpu(irq_stat, cpu).spurious_irqs;
 	sum += per_cpu(irq_stat, cpu).timer_irqs_others;
-	sum += per_cpu(irq_stat, cpu).hmi_exceptions;
+#ifdef CONFIG_PPC_BOOK3S_64
+	sum += paca_ptrs[cpu]->hmi_irqs;
+#endif
 	sum += per_cpu(irq_stat, cpu).sreset_irqs;
 #ifdef CONFIG_PPC_WATCHDOG
 	sum += per_cpu(irq_stat, cpu).soft_nmi_irqs;
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index 9cc792a..6ab9b4d 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -244,7 +244,7 @@
 		 * So, we should never get here... but, its still
 		 * good to catch them, just in case...
 		 */
-		printk("Can't step on instruction %x\n", ppc_inst_val(insn));
+		printk("Can't step on instruction %s\n", ppc_inst_as_str(insn));
 		BUG();
 	} else {
 		/*
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index fd90c0e..ada59f6 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -49,6 +49,20 @@
 
 DECLARE_WORK(mce_ue_event_work, machine_process_ue_event);
 
+static BLOCKING_NOTIFIER_HEAD(mce_notifier_list);
+
+int mce_register_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_register(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_register_notifier);
+
+int mce_unregister_notifier(struct notifier_block *nb)
+{
+	return blocking_notifier_chain_unregister(&mce_notifier_list, nb);
+}
+EXPORT_SYMBOL_GPL(mce_unregister_notifier);
+
 static void mce_set_error_info(struct machine_check_event *mce,
 			       struct mce_error_info *mce_err)
 {
@@ -278,6 +292,7 @@
 	while (__this_cpu_read(mce_ue_count) > 0) {
 		index = __this_cpu_read(mce_ue_count) - 1;
 		evt = this_cpu_ptr(&mce_ue_event_queue[index]);
+		blocking_notifier_call_chain(&mce_notifier_list, 0, evt);
 #ifdef CONFIG_MEMORY_FAILURE
 		/*
 		 * This should probably queued elsewhere, but
@@ -370,6 +385,7 @@
 	static const char *mc_user_types[] = {
 		"Indeterminate",
 		"tlbie(l) invalid",
+		"scv invalid",
 	};
 	static const char *mc_ra_types[] = {
 		"Indeterminate",
@@ -711,7 +727,7 @@
 {	
 	int ret;
 
-	__this_cpu_inc(irq_stat.hmi_exceptions);
+	local_paca->hmi_irqs++;
 
 	ret = hmi_handle_debugtrig(regs);
 	if (ret >= 0)
diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c
index c3b522b..b7e1737 100644
--- a/arch/powerpc/kernel/mce_power.c
+++ b/arch/powerpc/kernel/mce_power.c
@@ -243,6 +243,45 @@
   MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
 { 0, 0, 0, 0, 0, 0, 0 } };
 
+static const struct mce_ierror_table mce_p10_ierror_table[] = {
+{ 0x00000000081c0000, 0x0000000000040000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000000080000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000000c0000, true,
+  MCE_ERROR_TYPE_SLB, MCE_SLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000100000, true,
+  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000140000, true,
+  MCE_ERROR_TYPE_TLB, MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x0000000000180000, true,
+  MCE_ERROR_TYPE_UE,  MCE_UE_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x00000000001c0000, true,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_IFETCH_FOREIGN, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008080000, true,
+  MCE_ERROR_TYPE_USER,MCE_USER_ERROR_SCV, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_WARNING, true },
+{ 0x00000000081c0000, 0x00000000080c0000, true,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_IFETCH, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008100000, true,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0x00000000081c0000, 0x0000000008140000, false,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_STORE, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,  MCE_SEV_FATAL, false }, /* ASYNC is fatal */
+{ 0x00000000081c0000, 0x00000000081c0000, true, MCE_ECLASS_HARDWARE,
+  MCE_ERROR_TYPE_RA,  MCE_RA_ERROR_PAGE_TABLE_WALK_IFETCH_FOREIGN,
+  MCE_INITIATOR_CPU,  MCE_SEV_SEVERE, true },
+{ 0, 0, 0, 0, 0, 0, 0 } };
+
 struct mce_derror_table {
 	unsigned long dsisr_value;
 	bool dar_valid; /* dar is a valid indicator of faulting address */
@@ -361,6 +400,46 @@
   MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
 { 0, false, 0, 0, 0, 0, 0 } };
 
+static const struct mce_derror_table mce_p10_derror_table[] = {
+{ 0x00008000, false,
+  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_LOAD_STORE, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00004000, true,
+  MCE_ERROR_TYPE_UE,   MCE_UE_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000800, true,
+  MCE_ERROR_TYPE_ERAT, MCE_ERAT_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000400, true,
+  MCE_ERROR_TYPE_TLB,  MCE_TLB_ERROR_MULTIHIT, MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000200, false,
+  MCE_ERROR_TYPE_USER, MCE_USER_ERROR_TLBIE, MCE_ECLASS_SOFTWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000080, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_MULTIHIT,	/* Before PARITY */
+  MCE_ECLASS_SOFT_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_WARNING, true },
+{ 0x00000100, true,
+  MCE_ERROR_TYPE_SLB,  MCE_SLB_ERROR_PARITY, MCE_ECLASS_HARD_INDETERMINATE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000040, true,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_LOAD, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000020, false,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000010, false,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_PAGE_TABLE_WALK_LOAD_STORE_FOREIGN,
+  MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0x00000008, false,
+  MCE_ERROR_TYPE_RA,   MCE_RA_ERROR_LOAD_STORE_FOREIGN, MCE_ECLASS_HARDWARE,
+  MCE_INITIATOR_CPU,   MCE_SEV_SEVERE, true },
+{ 0, false, 0, 0, 0, 0, 0 } };
+
 static int mce_find_instr_ea_and_phys(struct pt_regs *regs, uint64_t *addr,
 					uint64_t *phys_addr)
 {
@@ -657,3 +736,8 @@
 
 	return mce_handle_error(regs, mce_p9_derror_table, mce_p9_ierror_table);
 }
+
+long __machine_check_early_realmode_p10(struct pt_regs *regs)
+{
+	return mce_handle_error(regs, mce_p10_derror_table, mce_p10_ierror_table);
+}
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 1864605..7bb46ad 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -413,20 +413,6 @@
 	li	r0,0
 	std	r0,16(r1)
 
-BEGIN_FTR_SECTION
-	/*
-	 * This is the best time to turn AMR/IAMR off.
-	 * key 0 is used in radix for supervisor<->user
-	 * protection, but on hash key 0 is reserved
-	 * ideally we want to enter with a clean state.
-	 * NOTE, we rely on r0 being 0 from above.
-	 */
-	mtspr	SPRN_IAMR,r0
-BEGIN_FTR_SECTION_NESTED(42)
-	mtspr	SPRN_AMOR,r0
-END_FTR_SECTION_NESTED_IFSET(CPU_FTR_HVMODE, 42)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
-
 	/* save regs for local vars on new stack.
 	 * yes, we won't go back, but ...
 	 */
diff --git a/arch/powerpc/kernel/module.c b/arch/powerpc/kernel/module.c
index df649ac..a211b02 100644
--- a/arch/powerpc/kernel/module.c
+++ b/arch/powerpc/kernel/module.c
@@ -86,3 +86,14 @@
 
 	return 0;
 }
+
+#ifdef MODULES_VADDR
+void *module_alloc(unsigned long size)
+{
+	BUILD_BUG_ON(TASK_SIZE > MODULES_VADDR);
+
+	return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL,
+				    PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+				    __builtin_return_address(0));
+}
+#endif
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index 71a3f97..f89376f 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -62,8 +62,8 @@
 	/* Init pci_dn data structures */
 	pci_devs_phb_init_dynamic(phb);
 
-	/* Create EEH PEs for the PHB */
-	eeh_dev_phb_init_dynamic(phb);
+	/* Create EEH PE for the PHB */
+	eeh_phb_pe_create(phb);
 
 	/* Scan the bus */
 	pcibios_scan_phb(phb);
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 74da65a..0ad1576 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -57,8 +57,8 @@
 
 #define LPPACA_SIZE 0x400
 
-static void *__init alloc_shared_lppaca(unsigned long size, unsigned long align,
-					unsigned long limit, int cpu)
+static void *__init alloc_shared_lppaca(unsigned long size, unsigned long limit,
+					int cpu)
 {
 	size_t shared_lppaca_total_size = PAGE_ALIGN(nr_cpu_ids * LPPACA_SIZE);
 	static unsigned long shared_lppaca_size;
@@ -68,6 +68,13 @@
 	if (!shared_lppaca) {
 		memblock_set_bottom_up(true);
 
+		/*
+		 * See Documentation/powerpc/ultravisor.rst for more details.
+		 *
+		 * UV/HV data sharing is in PAGE_SIZE granularity. In order to
+		 * minimize the number of pages shared, align the allocation to
+		 * PAGE_SIZE.
+		 */
 		shared_lppaca =
 			memblock_alloc_try_nid(shared_lppaca_total_size,
 					       PAGE_SIZE, MEMBLOCK_LOW_LIMIT,
@@ -122,7 +129,7 @@
 		return NULL;
 
 	if (is_secure_guest())
-		lp = alloc_shared_lppaca(LPPACA_SIZE, 0x400, limit, cpu);
+		lp = alloc_shared_lppaca(LPPACA_SIZE, limit, cpu);
 	else
 		lp = alloc_paca_data(LPPACA_SIZE, 0x400, limit, cpu);
 
diff --git a/arch/powerpc/kernel/pci_dn.c b/arch/powerpc/kernel/pci_dn.c
index 4e654df..e99b7c5 100644
--- a/arch/powerpc/kernel/pci_dn.c
+++ b/arch/powerpc/kernel/pci_dn.c
@@ -124,9 +124,28 @@
 	return NULL;
 }
 
+#ifdef CONFIG_EEH
+static struct eeh_dev *eeh_dev_init(struct pci_dn *pdn)
+{
+	struct eeh_dev *edev;
+
+	/* Allocate EEH device */
+	edev = kzalloc(sizeof(*edev), GFP_KERNEL);
+	if (!edev)
+		return NULL;
+
+	/* Associate EEH device with OF node */
+	pdn->edev = edev;
+	edev->pdn = pdn;
+	edev->bdfn = (pdn->busno << 8) | pdn->devfn;
+	edev->controller = pdn->phb;
+
+	return edev;
+}
+#endif /* CONFIG_EEH */
+
 #ifdef CONFIG_PCI_IOV
 static struct pci_dn *add_one_sriov_vf_pdn(struct pci_dn *parent,
-					   int vf_index,
 					   int busno, int devfn)
 {
 	struct pci_dn *pdn;
@@ -143,7 +162,6 @@
 	pdn->parent = parent;
 	pdn->busno = busno;
 	pdn->devfn = devfn;
-	pdn->vf_index = vf_index;
 	pdn->pe_number = IODA_INVALID_PE;
 	INIT_LIST_HEAD(&pdn->child_list);
 	INIT_LIST_HEAD(&pdn->list);
@@ -174,7 +192,7 @@
 	for (i = 0; i < pci_sriov_get_totalvfs(pdev); i++) {
 		struct eeh_dev *edev __maybe_unused;
 
-		pdn = add_one_sriov_vf_pdn(parent, i,
+		pdn = add_one_sriov_vf_pdn(parent,
 					   pci_iov_virtfn_bus(pdev, i),
 					   pci_iov_virtfn_devfn(pdev, i));
 		if (!pdn) {
@@ -187,7 +205,10 @@
 		/* Create the EEH device for the VF */
 		edev = eeh_dev_init(pdn);
 		BUG_ON(!edev);
+
+		/* FIXME: these should probably be populated by the EEH probe */
 		edev->physfn = pdev;
+		edev->vf_index = i;
 #endif /* CONFIG_EEH */
 	}
 	return pci_get_pdn(pdev);
@@ -242,7 +263,7 @@
 				 * have a configured PE.
 				 */
 				if (edev->pe)
-					eeh_rmv_from_parent_pe(edev);
+					eeh_pe_tree_remove(edev);
 
 				pdn->edev = NULL;
 				kfree(edev);
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 794b754..016bd83 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -471,49 +471,58 @@
 
 #ifdef CONFIG_PPC_BOOK3S_64
 #ifdef CONFIG_PPC_FPU
-static int restore_fp(struct task_struct *tsk)
+static bool should_restore_fp(void)
 {
-	if (tsk->thread.load_fp) {
-		load_fp_state(&current->thread.fp_state);
+	if (current->thread.load_fp) {
 		current->thread.load_fp++;
-		return 1;
+		return true;
 	}
-	return 0;
+	return false;
+}
+
+static void do_restore_fp(void)
+{
+	load_fp_state(&current->thread.fp_state);
 }
 #else
-static int restore_fp(struct task_struct *tsk) { return 0; }
+static bool should_restore_fp(void) { return false; }
+static void do_restore_fp(void) { }
 #endif /* CONFIG_PPC_FPU */
 
 #ifdef CONFIG_ALTIVEC
-#define loadvec(thr) ((thr).load_vec)
-static int restore_altivec(struct task_struct *tsk)
+static bool should_restore_altivec(void)
 {
-	if (cpu_has_feature(CPU_FTR_ALTIVEC) && (tsk->thread.load_vec)) {
-		load_vr_state(&tsk->thread.vr_state);
-		tsk->thread.used_vr = 1;
-		tsk->thread.load_vec++;
-
-		return 1;
+	if (cpu_has_feature(CPU_FTR_ALTIVEC) && (current->thread.load_vec)) {
+		current->thread.load_vec++;
+		return true;
 	}
-	return 0;
+	return false;
+}
+
+static void do_restore_altivec(void)
+{
+	load_vr_state(&current->thread.vr_state);
+	current->thread.used_vr = 1;
 }
 #else
-#define loadvec(thr) 0
-static inline int restore_altivec(struct task_struct *tsk) { return 0; }
+static bool should_restore_altivec(void) { return false; }
+static void do_restore_altivec(void) { }
 #endif /* CONFIG_ALTIVEC */
 
 #ifdef CONFIG_VSX
-static int restore_vsx(struct task_struct *tsk)
+static bool should_restore_vsx(void)
 {
-	if (cpu_has_feature(CPU_FTR_VSX)) {
-		tsk->thread.used_vsr = 1;
-		return 1;
-	}
-
-	return 0;
+	if (cpu_has_feature(CPU_FTR_VSX))
+		return true;
+	return false;
+}
+static void do_restore_vsx(void)
+{
+	current->thread.used_vsr = 1;
 }
 #else
-static inline int restore_vsx(struct task_struct *tsk) { return 0; }
+static bool should_restore_vsx(void) { return false; }
+static void do_restore_vsx(void) { }
 #endif /* CONFIG_VSX */
 
 /*
@@ -529,32 +538,42 @@
 void notrace restore_math(struct pt_regs *regs)
 {
 	unsigned long msr;
-
-	if (!MSR_TM_ACTIVE(regs->msr) &&
-		!current->thread.load_fp && !loadvec(current->thread))
-		return;
+	unsigned long new_msr = 0;
 
 	msr = regs->msr;
-	msr_check_and_set(msr_all_available);
 
 	/*
-	 * Only reload if the bit is not set in the user MSR, the bit BEING set
-	 * indicates that the registers are hot
+	 * new_msr tracks the facilities that are to be restored. Only reload
+	 * if the bit is not set in the user MSR (if it is set, the registers
+	 * are live for the user thread).
 	 */
-	if ((!(msr & MSR_FP)) && restore_fp(current))
-		msr |= MSR_FP | current->thread.fpexc_mode;
+	if ((!(msr & MSR_FP)) && should_restore_fp())
+		new_msr |= MSR_FP | current->thread.fpexc_mode;
 
-	if ((!(msr & MSR_VEC)) && restore_altivec(current))
-		msr |= MSR_VEC;
+	if ((!(msr & MSR_VEC)) && should_restore_altivec())
+		new_msr |= MSR_VEC;
 
-	if ((msr & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC) &&
-			restore_vsx(current)) {
-		msr |= MSR_VSX;
+	if ((!(msr & MSR_VSX)) && should_restore_vsx()) {
+		if (((msr | new_msr) & (MSR_FP | MSR_VEC)) == (MSR_FP | MSR_VEC))
+			new_msr |= MSR_VSX;
 	}
 
-	msr_check_and_clear(msr_all_available);
+	if (new_msr) {
+		msr_check_and_set(new_msr);
 
-	regs->msr = msr;
+		if (new_msr & MSR_FP)
+			do_restore_fp();
+
+		if (new_msr & MSR_VEC)
+			do_restore_altivec();
+
+		if (new_msr & MSR_VSX)
+			do_restore_vsx();
+
+		msr_check_and_clear(new_msr);
+
+		regs->msr |= new_msr;
+	}
 }
 #endif
 
@@ -1599,6 +1618,7 @@
 {
 	struct pt_regs *childregs, *kregs;
 	extern void ret_from_fork(void);
+	extern void ret_from_fork_scv(void);
 	extern void ret_from_kernel_thread(void);
 	void (*f)(void);
 	unsigned long sp = (unsigned long)task_stack_page(p) + THREAD_SIZE;
@@ -1635,7 +1655,9 @@
 		if (usp)
 			childregs->gpr[1] = usp;
 		p->thread.regs = childregs;
-		childregs->gpr[3] = 0;  /* Result from fork() */
+		/* 64s sets this in ret_from_fork */
+		if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64))
+			childregs->gpr[3] = 0;  /* Result from fork() */
 		if (clone_flags & CLONE_SETTLS) {
 			if (!is_32bit_task())
 				childregs->gpr[13] = tls;
@@ -1643,7 +1665,10 @@
 				childregs->gpr[2] = tls;
 		}
 
-		f = ret_from_fork;
+		if (trap_is_scv(regs))
+			f = ret_from_fork_scv;
+		else
+			f = ret_from_fork;
 	}
 	childregs->msr &= ~(MSR_FP|MSR_VEC|MSR_VSX);
 	sp -= STACK_FRAME_OVERHEAD;
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 9cc49f2..d8a2fb8 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -163,7 +163,7 @@
 	{ .pabyte = 0,  .pabit = 6, .cpu_features  = CPU_FTR_NOEXECUTE },
 	{ .pabyte = 1,  .pabit = 2, .mmu_features  = MMU_FTR_CI_LARGE_PAGE },
 #ifdef CONFIG_PPC_RADIX_MMU
-	{ .pabyte = 40, .pabit = 0, .mmu_features  = MMU_FTR_TYPE_RADIX },
+	{ .pabyte = 40, .pabit = 0, .mmu_features  = MMU_FTR_TYPE_RADIX | MMU_FTR_GTSE },
 #endif
 	{ .pabyte = 1,  .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN },
 	{ .pabyte = 5,  .pabit = 0, .cpu_features  = CPU_FTR_REAL_LE,
@@ -175,6 +175,8 @@
 	 */
 	{ .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP,
 	  .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP },
+
+	{ .pabyte = 64, .pabit = 0, .cpu_features = CPU_FTR_DAWR1 },
 };
 
 static void __init scan_features(unsigned long node, const unsigned char *ftrs,
@@ -468,8 +470,9 @@
  * This contains a list of memory blocks along with NUMA affinity
  * information.
  */
-static void __init early_init_drmem_lmb(struct drmem_lmb *lmb,
-					const __be32 **usm)
+static int  __init early_init_drmem_lmb(struct drmem_lmb *lmb,
+					const __be32 **usm,
+					void *data)
 {
 	u64 base, size;
 	int is_kexec_kdump = 0, rngs;
@@ -484,7 +487,7 @@
 	 */
 	if ((lmb->flags & DRCONF_MEM_RESERVED) ||
 	    !(lmb->flags & DRCONF_MEM_ASSIGNED))
-		return;
+		return 0;
 
 	if (*usm)
 		is_kexec_kdump = 1;
@@ -499,7 +502,7 @@
 		 */
 		rngs = dt_mem_next_cell(dt_root_size_cells, usm);
 		if (!rngs) /* there are no (base, size) duple */
-			return;
+			return 0;
 	}
 
 	do {
@@ -524,6 +527,8 @@
 		if (lmb->flags & DRCONF_MEM_HOTREMOVABLE)
 			memblock_mark_hotplug(base, size);
 	} while (--rngs);
+
+	return 0;
 }
 #endif /* CONFIG_PPC_PSERIES */
 
@@ -534,7 +539,7 @@
 #ifdef CONFIG_PPC_PSERIES
 	if (depth == 1 &&
 	    strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
-		walk_drmem_lmbs_early(node, early_init_drmem_lmb);
+		walk_drmem_lmbs_early(node, NULL, early_init_drmem_lmb);
 		return 0;
 	}
 #endif
@@ -815,6 +820,11 @@
 	/* Now try to figure out if we are running on LPAR and so on */
 	pseries_probe_fw_features();
 
+	/*
+	 * Initialize pkey features and default AMR/IAMR values
+	 */
+	pkey_early_init_devtree();
+
 #ifdef CONFIG_PPC_PS3
 	/* Identify PS3 firmware */
 	if (of_flat_dt_is_compatible(of_get_flat_dt_root(), "sony,ps3"))
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 90c604d..ae7ec99 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -169,6 +169,7 @@
 
 #ifdef CONFIG_PPC_PSERIES
 static bool __prombss prom_radix_disable;
+static bool __prombss prom_radix_gtse_disable;
 static bool __prombss prom_xive_disable;
 #endif
 
@@ -823,6 +824,12 @@
 	if (prom_radix_disable)
 		prom_debug("Radix disabled from cmdline\n");
 
+	opt = prom_strstr(prom_cmd_line, "radix_hcall_invalidate=on");
+	if (opt) {
+		prom_radix_gtse_disable = true;
+		prom_debug("Radix GTSE disabled from cmdline\n");
+	}
+
 	opt = prom_strstr(prom_cmd_line, "xive=off");
 	if (opt) {
 		prom_xive_disable = true;
@@ -1285,10 +1292,8 @@
 		prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support);
 		break;
 	case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */
-		if (val & OV5_FEAT(OV5_RADIX_GTSE)) {
-			prom_debug("Radix - GTSE supported\n");
-			support->radix_gtse = true;
-		}
+		if (val & OV5_FEAT(OV5_RADIX_GTSE))
+			support->radix_gtse = !prom_radix_gtse_disable;
 		break;
 	case OV5_INDX(OV5_XIVE_SUPPORT): /* Interrupt mode */
 		prom_parse_xive_model(val & OV5_FEAT(OV5_XIVE_SUPPORT),
@@ -1336,12 +1341,15 @@
 		}
 	}
 
-	if (supported.radix_mmu && supported.radix_gtse &&
-	    IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
-		/* Radix preferred - but we require GTSE for now */
-		prom_debug("Asking for radix with GTSE\n");
+	if (supported.radix_mmu && IS_ENABLED(CONFIG_PPC_RADIX_MMU)) {
+		/* Radix preferred - Check if GTSE is also supported */
+		prom_debug("Asking for radix\n");
 		ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX);
-		ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE);
+		if (supported.radix_gtse)
+			ibm_architecture_vec.vec5.radix_ext =
+					OV5_FEAT(OV5_RADIX_GTSE);
+		else
+			prom_debug("Radix GTSE isn't supported\n");
 	} else if (supported.hash_mmu) {
 		/* Default to hash mmu (if we can) */
 		prom_debug("Asking for hash\n");
@@ -3262,7 +3270,7 @@
 /*
  * Call the Ultravisor to transfer us to secure memory if we have an ESM blob.
  */
-static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt)
 {
 	int ret;
 
@@ -3292,7 +3300,7 @@
 	}
 }
 #else
-static void setup_secure_guest(unsigned long kbase, unsigned long fdt)
+static void __init setup_secure_guest(unsigned long kbase, unsigned long fdt)
 {
 }
 #endif /* CONFIG_PPC_SVM */
diff --git a/arch/powerpc/kernel/ptrace/ptrace-view.c b/arch/powerpc/kernel/ptrace/ptrace-view.c
index 13208a9..19823a2 100644
--- a/arch/powerpc/kernel/ptrace/ptrace-view.c
+++ b/arch/powerpc/kernel/ptrace/ptrace-view.c
@@ -470,13 +470,15 @@
 static int pkey_get(struct task_struct *target, const struct user_regset *regset,
 		    struct membuf to)
 {
+	int ret;
+
 	BUILD_BUG_ON(TSO(amr) + sizeof(unsigned long) != TSO(iamr));
-	BUILD_BUG_ON(TSO(iamr) + sizeof(unsigned long) != TSO(uamor));
 
 	if (!arch_pkeys_enabled())
 		return -ENODEV;
 
-	return membuf_write(&to, &target->thread.amr, ELF_NPKEY * sizeof(unsigned long));
+	membuf_write(&to, &target->thread.amr, 2 * sizeof(unsigned long));
+	return membuf_store(&to, default_uamor);
 }
 
 static int pkey_set(struct task_struct *target, const struct user_regset *regset,
@@ -498,9 +500,17 @@
 	if (ret)
 		return ret;
 
-	/* UAMOR determines which bits of the AMR can be set from userspace. */
-	target->thread.amr = (new_amr & target->thread.uamor) |
-			     (target->thread.amr & ~target->thread.uamor);
+	/*
+	 * UAMOR determines which bits of the AMR can be set from userspace.
+	 * UAMOR value 0b11 indicates that the AMR value can be modified
+	 * from userspace. If the kernel is using a specific key, we avoid
+	 * userspace modifying the AMR value for that key by masking them
+	 * via UAMOR 0b00.
+	 *
+	 * Pick the AMR values for the keys that kernel is using. This
+	 * will be indicated by the ~default_uamor bits.
+	 */
+	target->thread.amr = (new_amr & default_uamor) | (target->thread.amr & ~default_uamor);
 
 	return 0;
 }
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index a09eba0..806d554 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -843,96 +843,6 @@
 	__rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
 }
 
-enum rtas_cpu_state {
-	DOWN,
-	UP,
-};
-
-#ifndef CONFIG_SMP
-static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
-				cpumask_var_t cpus)
-{
-	if (!cpumask_empty(cpus)) {
-		cpumask_clear(cpus);
-		return -EINVAL;
-	} else
-		return 0;
-}
-#else
-/* On return cpumask will be altered to indicate CPUs changed.
- * CPUs with states changed will be set in the mask,
- * CPUs with status unchanged will be unset in the mask. */
-static int rtas_cpu_state_change_mask(enum rtas_cpu_state state,
-				cpumask_var_t cpus)
-{
-	int cpu;
-	int cpuret = 0;
-	int ret = 0;
-
-	if (cpumask_empty(cpus))
-		return 0;
-
-	for_each_cpu(cpu, cpus) {
-		struct device *dev = get_cpu_device(cpu);
-
-		switch (state) {
-		case DOWN:
-			cpuret = device_offline(dev);
-			break;
-		case UP:
-			cpuret = device_online(dev);
-			break;
-		}
-		if (cpuret < 0) {
-			pr_debug("%s: cpu_%s for cpu#%d returned %d.\n",
-					__func__,
-					((state == UP) ? "up" : "down"),
-					cpu, cpuret);
-			if (!ret)
-				ret = cpuret;
-			if (state == UP) {
-				/* clear bits for unchanged cpus, return */
-				cpumask_shift_right(cpus, cpus, cpu);
-				cpumask_shift_left(cpus, cpus, cpu);
-				break;
-			} else {
-				/* clear bit for unchanged cpu, continue */
-				cpumask_clear_cpu(cpu, cpus);
-			}
-		}
-		cond_resched();
-	}
-
-	return ret;
-}
-#endif
-
-int rtas_online_cpus_mask(cpumask_var_t cpus)
-{
-	int ret;
-
-	ret = rtas_cpu_state_change_mask(UP, cpus);
-
-	if (ret) {
-		cpumask_var_t tmp_mask;
-
-		if (!alloc_cpumask_var(&tmp_mask, GFP_KERNEL))
-			return ret;
-
-		/* Use tmp_mask to preserve cpus mask from first failure */
-		cpumask_copy(tmp_mask, cpus);
-		rtas_offline_cpus_mask(tmp_mask);
-		free_cpumask_var(tmp_mask);
-	}
-
-	return ret;
-}
-
-int rtas_offline_cpus_mask(cpumask_var_t cpus)
-{
-	return rtas_cpu_state_change_mask(DOWN, cpus);
-}
-
 int rtas_ibm_suspend_me(u64 handle)
 {
 	long state;
@@ -940,8 +850,6 @@
 	unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
 	struct rtas_suspend_me_data data;
 	DECLARE_COMPLETION_ONSTACK(done);
-	cpumask_var_t offline_mask;
-	int cpuret;
 
 	if (!rtas_service_present("ibm,suspend-me"))
 		return -ENOSYS;
@@ -962,9 +870,6 @@
 		return -EIO;
 	}
 
-	if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL))
-		return -ENOMEM;
-
 	atomic_set(&data.working, 0);
 	atomic_set(&data.done, 0);
 	atomic_set(&data.error, 0);
@@ -973,24 +878,8 @@
 
 	lock_device_hotplug();
 
-	/* All present CPUs must be online */
-	cpumask_andnot(offline_mask, cpu_present_mask, cpu_online_mask);
-	cpuret = rtas_online_cpus_mask(offline_mask);
-	if (cpuret) {
-		pr_err("%s: Could not bring present CPUs online.\n", __func__);
-		atomic_set(&data.error, cpuret);
-		goto out;
-	}
-
 	cpu_hotplug_disable();
 
-	/* Check if we raced with a CPU-Offline Operation */
-	if (!cpumask_equal(cpu_present_mask, cpu_online_mask)) {
-		pr_info("%s: Raced against a concurrent CPU-Offline\n", __func__);
-		atomic_set(&data.error, -EAGAIN);
-		goto out_hotplug_enable;
-	}
-
 	/* Call function on all CPUs.  One of us will make the
 	 * rtas call
 	 */
@@ -1001,18 +890,11 @@
 	if (atomic_read(&data.error) != 0)
 		printk(KERN_ERR "Error doing global join\n");
 
-out_hotplug_enable:
+
 	cpu_hotplug_enable();
 
-	/* Take down CPUs not online prior to suspend */
-	cpuret = rtas_offline_cpus_mask(offline_mask);
-	if (cpuret)
-		pr_warn("%s: Could not restore CPUs to offline state.\n",
-				__func__);
-
-out:
 	unlock_device_hotplug();
-	free_cpumask_var(offline_mask);
+
 	return atomic_read(&data.error);
 }
 
diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c
index 89b798f..8561dfb 100644
--- a/arch/powerpc/kernel/rtasd.c
+++ b/arch/powerpc/kernel/rtasd.c
@@ -273,37 +273,15 @@
 	}
 }
 
-#ifdef CONFIG_PPC_PSERIES
-static void handle_prrn_event(s32 scope)
-{
-	/*
-	 * For PRRN, we must pass the negative of the scope value in
-	 * the RTAS event.
-	 */
-	pseries_devicetree_update(-scope);
-	numa_update_cpu_topology(false);
-}
-
 static void handle_rtas_event(const struct rtas_error_log *log)
 {
-	if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
+	if (!machine_is(pseries))
 		return;
 
-	/* For PRRN Events the extended log length is used to denote
-	 * the scope for calling rtas update-nodes.
-	 */
-	handle_prrn_event(rtas_error_extended_log_length(log));
+	if (rtas_error_type(log) == RTAS_TYPE_PRRN)
+		pr_info_ratelimited("Platform resource reassignment ignored.\n");
 }
 
-#else
-
-static void handle_rtas_event(const struct rtas_error_log *log)
-{
-	return;
-}
-
-#endif
-
 static int rtas_log_open(struct inode * inode, struct file * file)
 {
 	return 0;
diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c
index 4b98232..f9af305 100644
--- a/arch/powerpc/kernel/secure_boot.c
+++ b/arch/powerpc/kernel/secure_boot.c
@@ -23,12 +23,19 @@
 {
 	struct device_node *node;
 	bool enabled = false;
+	u32 secureboot;
 
 	node = get_ppc_fw_sb_node();
 	enabled = of_property_read_bool(node, "os-secureboot-enforcing");
-
 	of_node_put(node);
 
+	if (enabled)
+		goto out;
+
+	if (!of_property_read_u32(of_root, "ibm,secure-boot", &secureboot))
+		enabled = (secureboot > 1);
+
+out:
 	pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled");
 
 	return enabled;
@@ -38,12 +45,19 @@
 {
 	struct device_node *node;
 	bool enabled = false;
+	u32 trustedboot;
 
 	node = get_ppc_fw_sb_node();
 	enabled = of_property_read_bool(node, "trusted-enabled");
-
 	of_node_put(node);
 
+	if (enabled)
+		goto out;
+
+	if (!of_property_read_u32(of_root, "ibm,trusted-boot", &trustedboot))
+		enabled = (trustedboot > 0);
+
+out:
 	pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled");
 
 	return enabled;
diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c
index d86701c..c9876aa 100644
--- a/arch/powerpc/kernel/security.c
+++ b/arch/powerpc/kernel/security.c
@@ -21,13 +21,13 @@
 
 u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
-enum count_cache_flush_type {
-	COUNT_CACHE_FLUSH_NONE	= 0x1,
-	COUNT_CACHE_FLUSH_SW	= 0x2,
-	COUNT_CACHE_FLUSH_HW	= 0x4,
+enum branch_cache_flush_type {
+	BRANCH_CACHE_FLUSH_NONE	= 0x1,
+	BRANCH_CACHE_FLUSH_SW	= 0x2,
+	BRANCH_CACHE_FLUSH_HW	= 0x4,
 };
-static enum count_cache_flush_type count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
-static bool link_stack_flush_enabled;
+static enum branch_cache_flush_type count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE;
+static enum branch_cache_flush_type link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE;
 
 bool barrier_nospec_enabled;
 static bool no_nospec;
@@ -219,24 +219,25 @@
 		if (ccd)
 			seq_buf_printf(&s, "Indirect branch cache disabled");
 
-		if (link_stack_flush_enabled)
-			seq_buf_printf(&s, ", Software link stack flush");
-
-	} else if (count_cache_flush_type != COUNT_CACHE_FLUSH_NONE) {
+	} else if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) {
 		seq_buf_printf(&s, "Mitigation: Software count cache flush");
 
-		if (count_cache_flush_type == COUNT_CACHE_FLUSH_HW)
+		if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW)
 			seq_buf_printf(&s, " (hardware accelerated)");
 
-		if (link_stack_flush_enabled)
-			seq_buf_printf(&s, ", Software link stack flush");
-
 	} else if (btb_flush_enabled) {
 		seq_buf_printf(&s, "Mitigation: Branch predictor state flush");
 	} else {
 		seq_buf_printf(&s, "Vulnerable");
 	}
 
+	if (bcs || ccd || count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE) {
+		if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE)
+			seq_buf_printf(&s, ", Software link stack flush");
+		if (link_stack_flush_type == BRANCH_CACHE_FLUSH_HW)
+			seq_buf_printf(&s, " (hardware accelerated)");
+	}
+
 	seq_buf_printf(&s, "\n");
 
 	return s.len;
@@ -427,61 +428,79 @@
 device_initcall(stf_barrier_debugfs_init);
 #endif /* CONFIG_DEBUG_FS */
 
-static void no_count_cache_flush(void)
+static void update_branch_cache_flush(void)
 {
-	count_cache_flush_type = COUNT_CACHE_FLUSH_NONE;
-	pr_info("count-cache-flush: software flush disabled.\n");
-}
-
-static void toggle_count_cache_flush(bool enable)
-{
-	if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE) &&
-	    !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK))
-		enable = false;
-
-	if (!enable) {
-		patch_instruction_site(&patch__call_flush_count_cache,
-				       ppc_inst(PPC_INST_NOP));
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+	// This controls the branch from guest_exit_cont to kvm_flush_link_stack
+	if (link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
 		patch_instruction_site(&patch__call_kvm_flush_link_stack,
 				       ppc_inst(PPC_INST_NOP));
-#endif
-		pr_info("link-stack-flush: software flush disabled.\n");
-		link_stack_flush_enabled = false;
-		no_count_cache_flush();
-		return;
+	} else {
+		// Could use HW flush, but that could also flush count cache
+		patch_branch_site(&patch__call_kvm_flush_link_stack,
+				  (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
 	}
-
-	// This enables the branch from _switch to flush_count_cache
-	patch_branch_site(&patch__call_flush_count_cache,
-			  (u64)&flush_count_cache, BRANCH_SET_LINK);
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-	// This enables the branch from guest_exit_cont to kvm_flush_link_stack
-	patch_branch_site(&patch__call_kvm_flush_link_stack,
-			  (u64)&kvm_flush_link_stack, BRANCH_SET_LINK);
 #endif
 
-	pr_info("link-stack-flush: software flush enabled.\n");
-	link_stack_flush_enabled = true;
+	// This controls the branch from _switch to flush_branch_caches
+	if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE &&
+	    link_stack_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+		patch_instruction_site(&patch__call_flush_branch_caches,
+				       ppc_inst(PPC_INST_NOP));
+	} else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW &&
+		   link_stack_flush_type == BRANCH_CACHE_FLUSH_HW) {
+		patch_instruction_site(&patch__call_flush_branch_caches,
+				       ppc_inst(PPC_INST_BCCTR_FLUSH));
+	} else {
+		patch_branch_site(&patch__call_flush_branch_caches,
+				  (u64)&flush_branch_caches, BRANCH_SET_LINK);
 
-	// If we just need to flush the link stack, patch an early return
-	if (!security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
-		patch_instruction_site(&patch__flush_link_stack_return,
-				       ppc_inst(PPC_INST_BLR));
-		no_count_cache_flush();
-		return;
+		// If we just need to flush the link stack, early return
+		if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE) {
+			patch_instruction_site(&patch__flush_link_stack_return,
+					       ppc_inst(PPC_INST_BLR));
+
+		// If we have flush instruction, early return
+		} else if (count_cache_flush_type == BRANCH_CACHE_FLUSH_HW) {
+			patch_instruction_site(&patch__flush_count_cache_return,
+					       ppc_inst(PPC_INST_BLR));
+		}
+	}
+}
+
+static void toggle_branch_cache_flush(bool enable)
+{
+	if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE)) {
+		if (count_cache_flush_type != BRANCH_CACHE_FLUSH_NONE)
+			count_cache_flush_type = BRANCH_CACHE_FLUSH_NONE;
+
+		pr_info("count-cache-flush: flush disabled.\n");
+	} else {
+		if (security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
+			count_cache_flush_type = BRANCH_CACHE_FLUSH_HW;
+			pr_info("count-cache-flush: hardware flush enabled.\n");
+		} else {
+			count_cache_flush_type = BRANCH_CACHE_FLUSH_SW;
+			pr_info("count-cache-flush: software flush enabled.\n");
+		}
 	}
 
-	if (!security_ftr_enabled(SEC_FTR_BCCTR_FLUSH_ASSIST)) {
-		count_cache_flush_type = COUNT_CACHE_FLUSH_SW;
-		pr_info("count-cache-flush: full software flush sequence enabled.\n");
-		return;
+	if (!enable || !security_ftr_enabled(SEC_FTR_FLUSH_LINK_STACK)) {
+		if (link_stack_flush_type != BRANCH_CACHE_FLUSH_NONE)
+			link_stack_flush_type = BRANCH_CACHE_FLUSH_NONE;
+
+		pr_info("link-stack-flush: flush disabled.\n");
+	} else {
+		if (security_ftr_enabled(SEC_FTR_BCCTR_LINK_FLUSH_ASSIST)) {
+			link_stack_flush_type = BRANCH_CACHE_FLUSH_HW;
+			pr_info("link-stack-flush: hardware flush enabled.\n");
+		} else {
+			link_stack_flush_type = BRANCH_CACHE_FLUSH_SW;
+			pr_info("link-stack-flush: software flush enabled.\n");
+		}
 	}
 
-	patch_instruction_site(&patch__flush_count_cache_return, ppc_inst(PPC_INST_BLR));
-	count_cache_flush_type = COUNT_CACHE_FLUSH_HW;
-	pr_info("count-cache-flush: hardware assisted flush sequence enabled\n");
+	update_branch_cache_flush();
 }
 
 void setup_count_cache_flush(void)
@@ -505,7 +524,7 @@
 	    security_ftr_enabled(SEC_FTR_FLUSH_COUNT_CACHE))
 		security_ftr_set(SEC_FTR_FLUSH_LINK_STACK);
 
-	toggle_count_cache_flush(enable);
+	toggle_branch_cache_flush(enable);
 }
 
 #ifdef CONFIG_DEBUG_FS
@@ -520,14 +539,14 @@
 	else
 		return -EINVAL;
 
-	toggle_count_cache_flush(enable);
+	toggle_branch_cache_flush(enable);
 
 	return 0;
 }
 
 static int count_cache_flush_get(void *data, u64 *val)
 {
-	if (count_cache_flush_type == COUNT_CACHE_FLUSH_NONE)
+	if (count_cache_flush_type == BRANCH_CACHE_FLUSH_NONE)
 		*val = 0;
 	else
 		*val = 1;
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 9d3faac..b198b0f 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -928,6 +928,9 @@
 	/* Reserve large chunks of memory for use by CMA for KVM. */
 	kvm_cma_reserve();
 
+	/*  Reserve large chunks of memory for us by CMA for hugetlb */
+	gigantic_hugetlb_cma_reserve();
+
 	klp_init_thread_info(&init_task);
 
 	init_mm.start_code = (unsigned long)_stext;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 0ba1ed7..6be4301 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -196,7 +196,10 @@
 	/* Under a PAPR hypervisor, we need hypercalls */
 	if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
 		/* Enable AIL if possible */
-		pseries_enable_reloc_on_exc();
+		if (!pseries_enable_reloc_on_exc()) {
+			init_task.thread.fscr &= ~FSCR_SCV;
+			cur_cpu_spec->cpu_user_features2 &= ~PPC_FEATURE2_SCV;
+		}
 
 		/*
 		 * Tell the hypervisor that we want our exceptions to
diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c
index b4143b6..d15a98c 100644
--- a/arch/powerpc/kernel/signal.c
+++ b/arch/powerpc/kernel/signal.c
@@ -205,8 +205,14 @@
 		return;
 
 	/* error signalled ? */
-	if (!(regs->ccr & 0x10000000))
+	if (trap_is_scv(regs)) {
+		/* 32-bit compat mode sign extend? */
+		if (!IS_ERR_VALUE(ret))
+			return;
+		ret = -ret;
+	} else if (!(regs->ccr & 0x10000000)) {
 		return;
+	}
 
 	switch (ret) {
 	case ERESTART_RESTARTBLOCK:
@@ -239,9 +245,14 @@
 		regs->nip -= 4;
 		regs->result = 0;
 	} else {
-		regs->result = -EINTR;
-		regs->gpr[3] = EINTR;
-		regs->ccr |= 0x10000000;
+		if (trap_is_scv(regs)) {
+			regs->result = -EINTR;
+			regs->gpr[3] = -EINTR;
+		} else {
+			regs->result = -EINTR;
+			regs->gpr[3] = EINTR;
+			regs->ccr |= 0x10000000;
+		}
 	}
 }
 
diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c
index 1415c16..96950f1 100644
--- a/arch/powerpc/kernel/signal_32.c
+++ b/arch/powerpc/kernel/signal_32.c
@@ -102,22 +102,18 @@
 		struct mcontext __user *frame)
 {
 	elf_greg_t64 *gregs = (elf_greg_t64 *)regs;
-	int i;
-	/* Force usr to alway see softe as 1 (interrupts enabled) */
-	elf_greg_t64 softe = 0x1;
+	int val, i;
 
 	WARN_ON(!FULL_REGS(regs));
 
 	for (i = 0; i <= PT_RESULT; i ++) {
-		if (i == 14 && !FULL_REGS(regs))
-			i = 32;
-		if ( i == PT_SOFTE) {
-			if(__put_user((unsigned int)softe, &frame->mc_gregs[i]))
-				return -EFAULT;
-			else
-				continue;
-		}
-		if (__put_user((unsigned int)gregs[i], &frame->mc_gregs[i]))
+		/* Force usr to alway see softe as 1 (interrupts enabled) */
+		if (i == PT_SOFTE)
+			val = 1;
+		else
+			val = gregs[i];
+
+		if (__put_user(val, &frame->mc_gregs[i]))
 			return -EFAULT;
 	}
 	return 0;
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 55e5f76..bfc9393 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -21,6 +21,7 @@
 #include <linux/ptrace.h>
 #include <linux/ratelimit.h>
 #include <linux/syscalls.h>
+#include <linux/pagemap.h>
 
 #include <asm/sigcontext.h>
 #include <asm/ucontext.h>
@@ -39,8 +40,8 @@
 #define GP_REGS_SIZE	min(sizeof(elf_gregset_t), sizeof(struct pt_regs))
 #define FP_REGS_SIZE	sizeof(elf_fpregset_t)
 
-#define TRAMP_TRACEBACK	3
-#define TRAMP_SIZE	6
+#define TRAMP_TRACEBACK	4
+#define TRAMP_SIZE	7
 
 /*
  * When we have signals to deliver, we set up on the user stack,
@@ -600,13 +601,15 @@
 	int i;
 	long err = 0;
 
+	/* bctrl # call the handler */
+	err |= __put_user(PPC_INST_BCTRL, &tramp[0]);
 	/* addi r1, r1, __SIGNAL_FRAMESIZE  # Pop the dummy stackframe */
 	err |= __put_user(PPC_INST_ADDI | __PPC_RT(R1) | __PPC_RA(R1) |
-			  (__SIGNAL_FRAMESIZE & 0xffff), &tramp[0]);
+			  (__SIGNAL_FRAMESIZE & 0xffff), &tramp[1]);
 	/* li r0, __NR_[rt_]sigreturn| */
-	err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[1]);
+	err |= __put_user(PPC_INST_ADDI | (syscall & 0xffff), &tramp[2]);
 	/* sc */
-	err |= __put_user(PPC_INST_SC, &tramp[2]);
+	err |= __put_user(PPC_INST_SC, &tramp[3]);
 
 	/* Minimal traceback info */
 	for (i=TRAMP_TRACEBACK; i < TRAMP_SIZE ;i++)
@@ -632,7 +635,6 @@
 SYSCALL_DEFINE3(swapcontext, struct ucontext __user *, old_ctx,
 		struct ucontext __user *, new_ctx, long, ctx_size)
 {
-	unsigned char tmp;
 	sigset_t set;
 	unsigned long new_msr = 0;
 	int ctx_has_vsx_region = 0;
@@ -667,9 +669,8 @@
 	}
 	if (new_ctx == NULL)
 		return 0;
-	if (!access_ok(new_ctx, ctx_size)
-	    || __get_user(tmp, (u8 __user *) new_ctx)
-	    || __get_user(tmp, (u8 __user *) new_ctx + ctx_size - 1))
+	if (!access_ok(new_ctx, ctx_size) ||
+	    fault_in_pages_readable((u8 __user *)new_ctx, ctx_size))
 		return -EFAULT;
 
 	/*
@@ -864,12 +865,12 @@
 
 	/* Set up to return from userspace. */
 	if (vdso64_rt_sigtramp && tsk->mm->context.vdso_base) {
-		regs->link = tsk->mm->context.vdso_base + vdso64_rt_sigtramp;
+		regs->nip = tsk->mm->context.vdso_base + vdso64_rt_sigtramp;
 	} else {
 		err |= setup_trampoline(__NR_rt_sigreturn, &frame->tramp[0]);
 		if (err)
 			goto badframe;
-		regs->link = (unsigned long) &frame->tramp[0];
+		regs->nip = (unsigned long) &frame->tramp[0];
 	}
 
 	/* Allocate a dummy caller frame for the signal handler. */
@@ -878,8 +879,8 @@
 
 	/* Set up "regs" so we "return" to the signal handler. */
 	if (is_elf2_task()) {
-		regs->nip = (unsigned long) ksig->ka.sa.sa_handler;
-		regs->gpr[12] = regs->nip;
+		regs->ctr = (unsigned long) ksig->ka.sa.sa_handler;
+		regs->gpr[12] = regs->ctr;
 	} else {
 		/* Handler is *really* a pointer to the function descriptor for
 		 * the signal routine.  The first entry in the function
@@ -889,7 +890,7 @@
 		func_descr_t __user *funct_desc_ptr =
 			(func_descr_t __user *) ksig->ka.sa.sa_handler;
 
-		err |= get_user(regs->nip, &funct_desc_ptr->entry);
+		err |= get_user(regs->ctr, &funct_desc_ptr->entry);
 		err |= get_user(regs->gpr[2], &funct_desc_ptr->toc);
 	}
 
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 7319947..8261999 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -59,6 +59,7 @@
 #include <asm/asm-prototypes.h>
 #include <asm/cpu_has_feature.h>
 #include <asm/ftrace.h>
+#include <asm/kup.h>
 
 #ifdef DEBUG
 #include <asm/udbg.h>
diff --git a/arch/powerpc/kernel/syscall_64.c b/arch/powerpc/kernel/syscall_64.c
index 79edba3..8e50818 100644
--- a/arch/powerpc/kernel/syscall_64.c
+++ b/arch/powerpc/kernel/syscall_64.c
@@ -60,6 +60,11 @@
 	local_irq_enable();
 
 	if (unlikely(current_thread_info()->flags & _TIF_SYSCALL_DOTRACE)) {
+		if (unlikely(regs->trap == 0x7ff0)) {
+			/* Unsupported scv vector */
+			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+			return regs->gpr[3];
+		}
 		/*
 		 * We use the return value of do_syscall_trace_enter() as the
 		 * syscall number. If the syscall was rejected for any reason
@@ -78,6 +83,11 @@
 		r8 = regs->gpr[8];
 
 	} else if (unlikely(r0 >= NR_syscalls)) {
+		if (unlikely(regs->trap == 0x7ff0)) {
+			/* Unsupported scv vector */
+			_exception(SIGILL, regs, ILL_ILLOPC, regs->nip);
+			return regs->gpr[3];
+		}
 		return -ENOSYS;
 	}
 
@@ -105,16 +115,20 @@
  * local irqs must be disabled. Returns false if the caller must re-enable
  * them, check for new work, and try again.
  */
-static notrace inline bool prep_irq_for_enabled_exit(void)
+static notrace inline bool prep_irq_for_enabled_exit(bool clear_ri)
 {
 	/* This must be done with RI=1 because tracing may touch vmaps */
 	trace_hardirqs_on();
 
 	/* This pattern matches prep_irq_for_idle */
-	__hard_EE_RI_disable();
+	if (clear_ri)
+		__hard_EE_RI_disable();
+	else
+		__hard_irq_disable();
 	if (unlikely(lazy_irq_pending_nocheck())) {
 		/* Took an interrupt, may have more exit work to do. */
-		__hard_RI_enable();
+		if (clear_ri)
+			__hard_RI_enable();
 		trace_hardirqs_off();
 		local_paca->irq_happened |= PACA_IRQ_HARD_DIS;
 
@@ -136,7 +150,8 @@
  * because RI=0 and soft mask state is "unreconciled", so it is marked notrace.
  */
 notrace unsigned long syscall_exit_prepare(unsigned long r3,
-					   struct pt_regs *regs)
+					   struct pt_regs *regs,
+					   long scv)
 {
 	unsigned long *ti_flagsp = &current_thread_info()->flags;
 	unsigned long ti_flags;
@@ -151,7 +166,7 @@
 
 	ti_flags = *ti_flagsp;
 
-	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO)) {
+	if (unlikely(r3 >= (unsigned long)-MAX_ERRNO) && !scv) {
 		if (likely(!(ti_flags & (_TIF_NOERROR | _TIF_RESTOREALL)))) {
 			r3 = -r3;
 			regs->ccr |= 0x10000000; /* Set SO bit in CR */
@@ -206,12 +221,20 @@
 			else if (cpu_has_feature(CPU_FTR_ALTIVEC))
 				mathflags |= MSR_VEC;
 
+			/*
+			 * If userspace MSR has all available FP bits set,
+			 * then they are live and no need to restore. If not,
+			 * it means the regs were given up and restore_math
+			 * may decide to restore them (to avoid taking an FP
+			 * fault).
+			 */
 			if ((regs->msr & mathflags) != mathflags)
 				restore_math(regs);
 		}
 	}
 
-	if (unlikely(!prep_irq_for_enabled_exit())) {
+	/* scv need not set RI=0 because SRRs are not used */
+	if (unlikely(!prep_irq_for_enabled_exit(!scv))) {
 		local_irq_enable();
 		goto again;
 	}
@@ -277,12 +300,13 @@
 			else if (cpu_has_feature(CPU_FTR_ALTIVEC))
 				mathflags |= MSR_VEC;
 
+			/* See above restore_math comment */
 			if ((regs->msr & mathflags) != mathflags)
 				restore_math(regs);
 		}
 	}
 
-	if (unlikely(!prep_irq_for_enabled_exit())) {
+	if (unlikely(!prep_irq_for_enabled_exit(true))) {
 		local_irq_enable();
 		local_irq_disable();
 		goto again;
@@ -345,7 +369,7 @@
 			}
 		}
 
-		if (unlikely(!prep_irq_for_enabled_exit())) {
+		if (unlikely(!prep_irq_for_enabled_exit(true))) {
 			/*
 			 * Can't local_irq_restore to replay if we were in
 			 * interrupt context. Must replay directly.
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index 571b325..46b4ebc 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -622,8 +622,10 @@
 SYSFS_PMCSETUP(pmc8, SPRN_PMC8);
 
 SYSFS_PMCSETUP(mmcra, SPRN_MMCRA);
+SYSFS_PMCSETUP(mmcr3, SPRN_MMCR3);
 
 static DEVICE_ATTR(mmcra, 0600, show_mmcra, store_mmcra);
+static DEVICE_ATTR(mmcr3, 0600, show_mmcr3, store_mmcr3);
 #endif /* HAS_PPC_PMC56 */
 
 
@@ -886,6 +888,9 @@
 #ifdef	CONFIG_PMU_SYSFS
 	if (cpu_has_feature(CPU_FTR_MMCRA))
 		device_create_file(s, &dev_attr_mmcra);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		device_create_file(s, &dev_attr_mmcr3);
 #endif /* CONFIG_PMU_SYSFS */
 
 	if (cpu_has_feature(CPU_FTR_PURR)) {
@@ -980,6 +985,9 @@
 #ifdef CONFIG_PMU_SYSFS
 	if (cpu_has_feature(CPU_FTR_MMCRA))
 		device_remove_file(s, &dev_attr_mmcra);
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		device_remove_file(s, &dev_attr_mmcr3);
 #endif /* CONFIG_PMU_SYSFS */
 
 	if (cpu_has_feature(CPU_FTR_PURR)) {
diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c
index c1fede6..42761eb 100644
--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -73,8 +73,8 @@
 
 	/* Make sure it is what we expect it to be */
 	if (!ppc_inst_equal(replaced, old)) {
-		pr_err("%p: replaced (%#x) != old (%#x)",
-		(void *)ip, ppc_inst_val(replaced), ppc_inst_val(old));
+		pr_err("%p: replaced (%s) != old (%s)",
+		(void *)ip, ppc_inst_as_str(replaced), ppc_inst_as_str(old));
 		return -EINVAL;
 	}
 
@@ -137,7 +137,7 @@
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
+		pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
@@ -172,8 +172,8 @@
 	/* We expect either a mflr r0, or a std r0, LRSAVE(r1) */
 	if (!ppc_inst_equal(op, ppc_inst(PPC_INST_MFLR)) &&
 	    !ppc_inst_equal(op, ppc_inst(PPC_INST_STD_LR))) {
-		pr_err("Unexpected instruction %08x around bl _mcount\n",
-		       ppc_inst_val(op));
+		pr_err("Unexpected instruction %s around bl _mcount\n",
+		       ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 #else
@@ -203,7 +203,7 @@
 	}
 
 	if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_LD_TOC))) {
-		pr_err("Expected %08x found %08x\n", PPC_INST_LD_TOC, ppc_inst_val(op));
+		pr_err("Expected %08x found %s\n", PPC_INST_LD_TOC, ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 #endif /* CONFIG_MPROFILE_KERNEL */
@@ -231,7 +231,7 @@
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
+		pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
@@ -406,7 +406,7 @@
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
+		pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
@@ -533,8 +533,8 @@
 		return -EFAULT;
 
 	if (!expected_nop_sequence(ip, op[0], op[1])) {
-		pr_err("Unexpected call sequence at %p: %x %x\n",
-		ip, ppc_inst_val(op[0]), ppc_inst_val(op[1]));
+		pr_err("Unexpected call sequence at %p: %s %s\n",
+		ip, ppc_inst_as_str(op[0]), ppc_inst_as_str(op[1]));
 		return -EINVAL;
 	}
 
@@ -597,7 +597,7 @@
 
 	/* It should be pointing to a nop */
 	if (!ppc_inst_equal(op,  ppc_inst(PPC_INST_NOP))) {
-		pr_err("Expected NOP but have %x\n", ppc_inst_val(op));
+		pr_err("Expected NOP but have %s\n", ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
@@ -654,7 +654,7 @@
 	}
 
 	if (!ppc_inst_equal(op, ppc_inst(PPC_INST_NOP))) {
-		pr_err("Unexpected call sequence at %p: %x\n", ip, ppc_inst_val(op));
+		pr_err("Unexpected call sequence at %p: %s\n", ip, ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
@@ -733,7 +733,7 @@
 
 	/* Make sure that that this is still a 24bit jump */
 	if (!is_bl_op(op)) {
-		pr_err("Not expected bl: opcode is %x\n", ppc_inst_val(op));
+		pr_err("Not expected bl: opcode is %s\n", ppc_inst_as_str(op));
 		return -EINVAL;
 	}
 
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 97413a3..d1ebe15 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -2060,14 +2060,6 @@
 NOKPROBE_SYMBOL(DebugException);
 #endif /* CONFIG_PPC_ADV_DEBUG_REGS */
 
-#if !defined(CONFIG_TAU_INT)
-void TAUException(struct pt_regs *regs)
-{
-	printk("TAU trap at PC: %lx, MSR: %lx, vector=%lx    %s\n",
-	       regs->nip, regs->msr, regs->trap, print_tainted());
-}
-#endif /* CONFIG_INT_TAU */
-
 #ifdef CONFIG_ALTIVEC
 void altivec_assist_exception(struct pt_regs *regs)
 {
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index e0f4ba4..8dad442 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -677,7 +677,7 @@
 	node = cpu_to_node(cpu);
 	WARN_ON_ONCE(node > 0xffff);
 
-	val = (cpu & 0xfff) | ((node & 0xffff) << 16);
+	val = (cpu & 0xffff) | ((node & 0xffff) << 16);
 	mtspr(SPRN_SPRG_VDSO_WRITE, val);
 	get_paca()->sprg_vdso = val;
 
diff --git a/arch/powerpc/kernel/vdso32/Makefile b/arch/powerpc/kernel/vdso32/Makefile
index e147bbd..87ab1152 100644
--- a/arch/powerpc/kernel/vdso32/Makefile
+++ b/arch/powerpc/kernel/vdso32/Makefile
@@ -50,7 +50,7 @@
 
 # actual build commands
 quiet_cmd_vdso32ld = VDSO32L $@
-      cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
+      cmd_vdso32ld = $(VDSOCC) $(c_flags) $(CC32FLAGS) -o $@ $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn) -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
 quiet_cmd_vdso32as = VDSO32A $@
       cmd_vdso32as = $(VDSOCC) $(a_flags) $(CC32FLAGS) -c -o $@ $<
 
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 5206c2e..4c98546 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -111,6 +111,7 @@
 		*(.note.GNU-stack)
 		*(.data .data.* .gnu.linkonce.d.* .sdata*)
 		*(.bss .sbss .dynbss .dynsbss)
+		*(.glink .iplt .plt .rela*)
 	}
 }
 
diff --git a/arch/powerpc/kernel/vdso64/Makefile b/arch/powerpc/kernel/vdso64/Makefile
index 32ebb352..38c317f 100644
--- a/arch/powerpc/kernel/vdso64/Makefile
+++ b/arch/powerpc/kernel/vdso64/Makefile
@@ -34,7 +34,7 @@
 
 # actual build commands
 quiet_cmd_vdso64ld = VDSO64L $@
-      cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^)
+      cmd_vdso64ld = $(CC) $(c_flags) -o $@ -Wl,-T$(filter %.lds,$^) $(filter %.o,$^) $(call cc-ldoption, -Wl$(comma)--orphan-handling=warn)
 
 # install commands for the unstripped file
 quiet_cmd_vdso_install = INSTALL $@
diff --git a/arch/powerpc/kernel/vdso64/cacheflush.S b/arch/powerpc/kernel/vdso64/cacheflush.S
index 526f5ba..cab1432 100644
--- a/arch/powerpc/kernel/vdso64/cacheflush.S
+++ b/arch/powerpc/kernel/vdso64/cacheflush.S
@@ -8,6 +8,7 @@
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
 #include <asm/asm-offsets.h>
 
 	.text
@@ -24,14 +25,12 @@
   .cfi_startproc
 	mflr	r12
   .cfi_register lr,r12
-	mr	r11,r3
-	bl	V_LOCAL_FUNC(__get_datapage)
+	get_datapage	r10, r0
 	mtlr	r12
-	mr	r10,r3
 
 	lwz	r7,CFG_DCACHE_BLOCKSZ(r10)
 	addi	r5,r7,-1
-	andc	r6,r11,r5		/* round low to line bdy */
+	andc	r6,r3,r5		/* round low to line bdy */
 	subf	r8,r6,r4		/* compute length */
 	add	r8,r8,r5		/* ensure we get enough */
 	lwz	r9,CFG_DCACHE_LOGBLOCKSZ(r10)
@@ -48,7 +47,7 @@
 
 	lwz	r7,CFG_ICACHE_BLOCKSZ(r10)
 	addi	r5,r7,-1
-	andc	r6,r11,r5		/* round low to line bdy */
+	andc	r6,r3,r5		/* round low to line bdy */
 	subf	r8,r6,r4		/* compute length */
 	add	r8,r8,r5
 	lwz	r9,CFG_ICACHE_LOGBLOCKSZ(r10)
diff --git a/arch/powerpc/kernel/vdso64/datapage.S b/arch/powerpc/kernel/vdso64/datapage.S
index dc84f5a..067247d 100644
--- a/arch/powerpc/kernel/vdso64/datapage.S
+++ b/arch/powerpc/kernel/vdso64/datapage.S
@@ -10,35 +10,13 @@
 #include <asm/asm-offsets.h>
 #include <asm/unistd.h>
 #include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
 
 	.text
 .global	__kernel_datapage_offset;
 __kernel_datapage_offset:
 	.long	0
 
-V_FUNCTION_BEGIN(__get_datapage)
-  .cfi_startproc
-	/* We don't want that exposed or overridable as we want other objects
-	 * to be able to bl directly to here
-	 */
-	.protected __get_datapage
-	.hidden __get_datapage
-
-	mflr	r0
-  .cfi_register lr,r0
-
-	bcl	20,31,data_page_branch
-data_page_branch:
-	mflr	r3
-	mtlr	r0
-	addi	r3, r3, __kernel_datapage_offset-data_page_branch
-	lwz	r0,0(r3)
-  .cfi_restore lr
-	add	r3,r0,r3
-	blr
-  .cfi_endproc
-V_FUNCTION_END(__get_datapage)
-
 /*
  * void *__kernel_get_syscall_map(unsigned int *syscall_count) ;
  *
@@ -53,7 +31,7 @@
 	mflr	r12
   .cfi_register lr,r12
 	mr	r4,r3
-	bl	V_LOCAL_FUNC(__get_datapage)
+	get_datapage	r3, r0
 	mtlr	r12
 	addi	r3,r3,CFG_SYSCALL_MAP64
 	cmpldi	cr0,r4,0
@@ -75,7 +53,7 @@
   .cfi_startproc
 	mflr	r12
   .cfi_register lr,r12
-	bl	V_LOCAL_FUNC(__get_datapage)
+	get_datapage	r3, r0
 	ld	r3,CFG_TB_TICKS_PER_SEC(r3)
 	mtlr	r12
 	crclr	cr0*4+so
diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 1c9a047..20f8be4 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -9,6 +9,7 @@
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/vdso.h>
+#include <asm/vdso_datapage.h>
 #include <asm/asm-offsets.h>
 #include <asm/unistd.h>
 
@@ -26,7 +27,7 @@
 
 	mr	r11,r3			/* r11 holds tv */
 	mr	r10,r4			/* r10 holds tz */
-	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */
+	get_datapage	r3, r0
 	cmpldi	r11,0			/* check if tv is NULL */
 	beq	2f
 	lis	r7,1000000@ha		/* load up USEC_PER_SEC */
@@ -71,7 +72,7 @@
 	mflr	r12			/* r12 saves lr */
   .cfi_register lr,r12
 	mr	r11,r4			/* r11 saves tp */
-	bl	V_LOCAL_FUNC(__get_datapage)	/* get data page */
+	get_datapage	r3, r0
 	lis	r7,NSEC_PER_SEC@h	/* want nanoseconds */
 	ori	r7,r7,NSEC_PER_SEC@l
 	beq	cr5,70f
@@ -188,7 +189,7 @@
 
 	mflr	r12
   .cfi_register lr,r12
-	bl	V_LOCAL_FUNC(__get_datapage)
+	get_datapage	r3, r0
 	lwz	r5, CLOCK_HRTIMER_RES(r3)
 	mtlr	r12
 	li	r3,0
@@ -221,7 +222,7 @@
   .cfi_register lr,r12
 
 	mr	r11,r3			/* r11 holds t */
-	bl	V_LOCAL_FUNC(__get_datapage)
+	get_datapage	r3, r0
 
 	ld	r4,STAMP_XTIME_SEC(r3)
 
diff --git a/arch/powerpc/kernel/vdso64/sigtramp.S b/arch/powerpc/kernel/vdso64/sigtramp.S
index a8cc040..bbf68cd 100644
--- a/arch/powerpc/kernel/vdso64/sigtramp.S
+++ b/arch/powerpc/kernel/vdso64/sigtramp.S
@@ -6,6 +6,7 @@
  * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), IBM Corp.
  * Copyright (C) 2004 Alan Modra (amodra@au.ibm.com)), IBM Corp.
  */
+#include <asm/cache.h>		/* IFETCH_ALIGN_BYTES */
 #include <asm/processor.h>
 #include <asm/ppc_asm.h>
 #include <asm/unistd.h>
@@ -14,21 +15,17 @@
 
 	.text
 
-/* The nop here is a hack.  The dwarf2 unwind routines subtract 1 from
-   the return address to get an address in the middle of the presumed
-   call instruction.  Since we don't have a call here, we artificially
-   extend the range covered by the unwind info by padding before the
-   real start.  */
-	nop
 	.balign 8
+	.balign IFETCH_ALIGN_BYTES
 V_FUNCTION_BEGIN(__kernel_sigtramp_rt64)
-.Lsigrt_start = . - 4
+.Lsigrt_start:
+	bctrl	/* call the handler */
 	addi	r1, r1, __SIGNAL_FRAMESIZE
 	li	r0,__NR_rt_sigreturn
 	sc
 .Lsigrt_end:
 V_FUNCTION_END(__kernel_sigtramp_rt64)
-/* The ".balign 8" above and the following zeros mimic the old stack
+/* The .balign 8 above and the following zeros mimic the old stack
    trampoline layout.  The last magic value is the ucontext pointer,
    chosen in such a way that older libgcc unwind code returns a zero
    for a sigcontext pointer.  */
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
index 256fb97..4e3a8d4 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -30,7 +30,7 @@
 	. = ALIGN(16);
 	.text		: {
 		*(.text .stub .text.* .gnu.linkonce.t.* __ftr_alt_*)
-		*(.sfpr .glink)
+		*(.sfpr)
 	}						:text
 	PROVIDE(__etext = .);
 	PROVIDE(_etext = .);
@@ -111,6 +111,7 @@
 		*(.branch_lt)
 		*(.data .data.* .gnu.linkonce.d.* .sdata*)
 		*(.bss .sbss .dynbss .dynsbss)
+		*(.glink .iplt .plt .rela*)
 	}
 }
 
diff --git a/arch/powerpc/kernel/vector.S b/arch/powerpc/kernel/vector.S
index efc5b52..801dc28 100644
--- a/arch/powerpc/kernel/vector.S
+++ b/arch/powerpc/kernel/vector.S
@@ -76,9 +76,7 @@
 	oris	r12,r12,MSR_VEC@h
 	std	r12,_MSR(r1)
 #endif
-	/* Don't care if r4 overflows, this is desired behaviour */
-	lbz	r4,THREAD_LOAD_VEC(r5)
-	addi	r4,r4,1
+	li	r4,1
 	stb	r4,THREAD_LOAD_VEC(r5)
 	addi	r6,r5,THREAD_VRSTATE
 	li	r4,1
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
index 86380c6..4aff684 100644
--- a/arch/powerpc/kexec/Makefile
+++ b/arch/powerpc/kexec/Makefile
@@ -7,7 +7,7 @@
 
 obj-$(CONFIG_PPC32)		+= relocate_32.o
 
-obj-$(CONFIG_KEXEC_FILE)	+= file_load.o elf_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE)	+= file_load.o ranges.o file_load_$(BITS).o elf_$(BITS).o
 
 ifdef CONFIG_HAVE_IMA_KEXEC
 ifdef CONFIG_IMA
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
index b41840921..8a449b2 100644
--- a/arch/powerpc/kexec/core_64.c
+++ b/arch/powerpc/kexec/core_64.c
@@ -152,6 +152,8 @@
 	if (ppc_md.kexec_cpu_down)
 		ppc_md.kexec_cpu_down(0, 1);
 
+	reset_sprs();
+
 	kexec_smp_wait();
 	/* NOTREACHED */
 }
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
index 3072fd6..d0e459b 100644
--- a/arch/powerpc/kexec/elf_64.c
+++ b/arch/powerpc/kexec/elf_64.c
@@ -35,6 +35,7 @@
 	void *fdt;
 	const void *slave_code;
 	struct elfhdr ehdr;
+	char *modified_cmdline = NULL;
 	struct kexec_elf_info elf_info;
 	struct kexec_buf kbuf = { .image = image, .buf_min = 0,
 				  .buf_max = ppc64_rma_size };
@@ -46,6 +47,14 @@
 	if (ret)
 		goto out;
 
+	if (image->type == KEXEC_TYPE_CRASH) {
+		/* min & max buffer values for kdump case */
+		kbuf.buf_min = pbuf.buf_min = crashk_res.start;
+		kbuf.buf_max = pbuf.buf_max =
+				((crashk_res.end < ppc64_rma_size) ?
+				 crashk_res.end : (ppc64_rma_size - 1));
+	}
+
 	ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
 	if (ret)
 		goto out;
@@ -60,6 +69,25 @@
 
 	pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
 
+	/* Load additional segments needed for panic kernel */
+	if (image->type == KEXEC_TYPE_CRASH) {
+		ret = load_crashdump_segments_ppc64(image, &kbuf);
+		if (ret) {
+			pr_err("Failed to load kdump kernel segments\n");
+			goto out;
+		}
+
+		/* Setup cmdline for kdump kernel case */
+		modified_cmdline = setup_kdump_cmdline(image, cmdline,
+						       cmdline_len);
+		if (!modified_cmdline) {
+			pr_err("Setting up cmdline for kdump kernel failed\n");
+			ret = -EINVAL;
+			goto out;
+		}
+		cmdline = modified_cmdline;
+	}
+
 	if (initrd != NULL) {
 		kbuf.buffer = initrd;
 		kbuf.bufsz = kbuf.memsz = initrd_len;
@@ -88,7 +116,8 @@
 		goto out;
 	}
 
-	ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
+	ret = setup_new_fdt_ppc64(image, fdt, initrd_load_addr,
+				  initrd_len, cmdline);
 	if (ret)
 		goto out;
 
@@ -107,12 +136,13 @@
 	pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
 
 	slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
-	ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
-			      fdt_load_addr);
+	ret = setup_purgatory_ppc64(image, slave_code, fdt, kernel_load_addr,
+				    fdt_load_addr);
 	if (ret)
 		pr_err("Error setting up the purgatory.\n");
 
 out:
+	kfree(modified_cmdline);
 	kexec_free_elf_info(&elf_info);
 
 	/* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
index 143c917..9a232bc 100644
--- a/arch/powerpc/kexec/file_load.c
+++ b/arch/powerpc/kexec/file_load.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * ppc64 code to implement the kexec_file_load syscall
+ * powerpc code to implement the kexec_file_load syscall
  *
  * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
  * Copyright (C) 2004  IBM Corp.
@@ -18,23 +18,45 @@
 #include <linux/kexec.h>
 #include <linux/of_fdt.h>
 #include <linux/libfdt.h>
+#include <asm/setup.h>
 #include <asm/ima.h>
 
-#define SLAVE_CODE_SIZE		256
+#define SLAVE_CODE_SIZE		256	/* First 0x100 bytes */
 
-const struct kexec_file_ops * const kexec_file_loaders[] = {
-	&kexec_elf64_ops,
-	NULL
-};
-
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
-				  unsigned long buf_len)
+/**
+ * setup_kdump_cmdline - Prepend "elfcorehdr=<addr> " to command line
+ *                       of kdump kernel for exporting the core.
+ * @image:               Kexec image
+ * @cmdline:             Command line parameters to update.
+ * @cmdline_len:         Length of the cmdline parameters.
+ *
+ * kdump segment must be setup before calling this function.
+ *
+ * Returns new cmdline buffer for kdump kernel on success, NULL otherwise.
+ */
+char *setup_kdump_cmdline(struct kimage *image, char *cmdline,
+			  unsigned long cmdline_len)
 {
-	/* We don't support crash kernels yet. */
-	if (image->type == KEXEC_TYPE_CRASH)
-		return -EOPNOTSUPP;
+	int elfcorehdr_strlen;
+	char *cmdline_ptr;
 
-	return kexec_image_probe_default(image, buf, buf_len);
+	cmdline_ptr = kzalloc(COMMAND_LINE_SIZE, GFP_KERNEL);
+	if (!cmdline_ptr)
+		return NULL;
+
+	elfcorehdr_strlen = sprintf(cmdline_ptr, "elfcorehdr=0x%lx ",
+				    image->arch.elfcorehdr_addr);
+
+	if (elfcorehdr_strlen + cmdline_len > COMMAND_LINE_SIZE) {
+		pr_err("Appending elfcorehdr=<addr> exceeds cmdline size\n");
+		kfree(cmdline_ptr);
+		return NULL;
+	}
+
+	memcpy(cmdline_ptr + elfcorehdr_strlen, cmdline, cmdline_len);
+	// Ensure it's nul terminated
+	cmdline_ptr[COMMAND_LINE_SIZE - 1] = '\0';
+	return cmdline_ptr;
 }
 
 /**
@@ -236,6 +258,20 @@
 		}
 	}
 
+	if (image->type == KEXEC_TYPE_CRASH) {
+		/*
+		 * Avoid elfcorehdr from being stomped on in kdump kernel by
+		 * setting up memory reserve map.
+		 */
+		ret = fdt_add_mem_rsv(fdt, image->arch.elfcorehdr_addr,
+				      image->arch.elf_headers_sz);
+		if (ret) {
+			pr_err("Error reserving elfcorehdr memory: %s\n",
+			       fdt_strerror(ret));
+			goto err;
+		}
+	}
+
 	ret = setup_ima_buffer(image, fdt, chosen_node);
 	if (ret) {
 		pr_err("Error setting up the new device tree.\n");
diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c
new file mode 100644
index 0000000..53bb71e
--- /dev/null
+++ b/arch/powerpc/kexec/file_load_64.c
@@ -0,0 +1,1119 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ppc64 code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2020  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-ppc64.c, kexec-elf-rel-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Hari Bathini, IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <linux/of_device.h>
+#include <linux/memblock.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <asm/drmem.h>
+#include <asm/kexec_ranges.h>
+#include <asm/crashdump-ppc64.h>
+
+struct umem_info {
+	u64 *buf;		/* data buffer for usable-memory property */
+	u32 size;		/* size allocated for the data buffer */
+	u32 max_entries;	/* maximum no. of entries */
+	u32 idx;		/* index of current entry */
+
+	/* usable memory ranges to look up */
+	unsigned int nr_ranges;
+	const struct crash_mem_range *ranges;
+};
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+	&kexec_elf64_ops,
+	NULL
+};
+
+/**
+ * get_exclude_memory_ranges - Get exclude memory ranges. This list includes
+ *                             regions like opal/rtas, tce-table, initrd,
+ *                             kernel, htab which should be avoided while
+ *                             setting up kexec load segments.
+ * @mem_ranges:                Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_exclude_memory_ranges(struct crash_mem **mem_ranges)
+{
+	int ret;
+
+	ret = add_tce_mem_ranges(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_initrd_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_htab_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_kernel_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_rtas_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_opal_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_reserved_mem_ranges(mem_ranges);
+	if (ret)
+		goto out;
+
+	/* exclude memory ranges should be sorted for easy lookup */
+	sort_memory_ranges(*mem_ranges, true);
+out:
+	if (ret)
+		pr_err("Failed to setup exclude memory ranges\n");
+	return ret;
+}
+
+/**
+ * get_usable_memory_ranges - Get usable memory ranges. This list includes
+ *                            regions like crashkernel, opal/rtas & tce-table,
+ *                            that kdump kernel could use.
+ * @mem_ranges:               Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_usable_memory_ranges(struct crash_mem **mem_ranges)
+{
+	int ret;
+
+	/*
+	 * Early boot failure observed on guests when low memory (first memory
+	 * block?) is not added to usable memory. So, add [0, crashk_res.end]
+	 * instead of [crashk_res.start, crashk_res.end] to workaround it.
+	 * Also, crashed kernel's memory must be added to reserve map to
+	 * avoid kdump kernel from using it.
+	 */
+	ret = add_mem_range(mem_ranges, 0, crashk_res.end + 1);
+	if (ret)
+		goto out;
+
+	ret = add_rtas_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_opal_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_tce_mem_ranges(mem_ranges);
+out:
+	if (ret)
+		pr_err("Failed to setup usable memory ranges\n");
+	return ret;
+}
+
+/**
+ * get_crash_memory_ranges - Get crash memory ranges. This list includes
+ *                           first/crashing kernel's memory regions that
+ *                           would be exported via an elfcore.
+ * @mem_ranges:              Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_crash_memory_ranges(struct crash_mem **mem_ranges)
+{
+	struct memblock_region *reg;
+	struct crash_mem *tmem;
+	int ret;
+
+	for_each_memblock(memory, reg) {
+		u64 base, size;
+
+		base = (u64)reg->base;
+		size = (u64)reg->size;
+
+		/* Skip backup memory region, which needs a separate entry */
+		if (base == BACKUP_SRC_START) {
+			if (size > BACKUP_SRC_SIZE) {
+				base = BACKUP_SRC_END + 1;
+				size -= BACKUP_SRC_SIZE;
+			} else
+				continue;
+		}
+
+		ret = add_mem_range(mem_ranges, base, size);
+		if (ret)
+			goto out;
+
+		/* Try merging adjacent ranges before reallocation attempt */
+		if ((*mem_ranges)->nr_ranges == (*mem_ranges)->max_nr_ranges)
+			sort_memory_ranges(*mem_ranges, true);
+	}
+
+	/* Reallocate memory ranges if there is no space to split ranges */
+	tmem = *mem_ranges;
+	if (tmem && (tmem->nr_ranges == tmem->max_nr_ranges)) {
+		tmem = realloc_mem_ranges(mem_ranges);
+		if (!tmem)
+			goto out;
+	}
+
+	/* Exclude crashkernel region */
+	ret = crash_exclude_mem_range(tmem, crashk_res.start, crashk_res.end);
+	if (ret)
+		goto out;
+
+	/*
+	 * FIXME: For now, stay in parity with kexec-tools but if RTAS/OPAL
+	 *        regions are exported to save their context at the time of
+	 *        crash, they should actually be backed up just like the
+	 *        first 64K bytes of memory.
+	 */
+	ret = add_rtas_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_opal_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	/* create a separate program header for the backup region */
+	ret = add_mem_range(mem_ranges, BACKUP_SRC_START, BACKUP_SRC_SIZE);
+	if (ret)
+		goto out;
+
+	sort_memory_ranges(*mem_ranges, false);
+out:
+	if (ret)
+		pr_err("Failed to setup crash memory ranges\n");
+	return ret;
+}
+
+/**
+ * get_reserved_memory_ranges - Get reserve memory ranges. This list includes
+ *                              memory regions that should be added to the
+ *                              memory reserve map to ensure the region is
+ *                              protected from any mischief.
+ * @mem_ranges:                 Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int get_reserved_memory_ranges(struct crash_mem **mem_ranges)
+{
+	int ret;
+
+	ret = add_rtas_mem_range(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_tce_mem_ranges(mem_ranges);
+	if (ret)
+		goto out;
+
+	ret = add_reserved_mem_ranges(mem_ranges);
+out:
+	if (ret)
+		pr_err("Failed to setup reserved memory ranges\n");
+	return ret;
+}
+
+/**
+ * __locate_mem_hole_top_down - Looks top down for a large enough memory hole
+ *                              in the memory regions between buf_min & buf_max
+ *                              for the buffer. If found, sets kbuf->mem.
+ * @kbuf:                       Buffer contents and memory parameters.
+ * @buf_min:                    Minimum address for the buffer.
+ * @buf_max:                    Maximum address for the buffer.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int __locate_mem_hole_top_down(struct kexec_buf *kbuf,
+				      u64 buf_min, u64 buf_max)
+{
+	int ret = -EADDRNOTAVAIL;
+	phys_addr_t start, end;
+	u64 i;
+
+	for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE,
+			       MEMBLOCK_NONE, &start, &end, NULL) {
+		/*
+		 * memblock uses [start, end) convention while it is
+		 * [start, end] here. Fix the off-by-one to have the
+		 * same convention.
+		 */
+		end -= 1;
+
+		if (start > buf_max)
+			continue;
+
+		/* Memory hole not found */
+		if (end < buf_min)
+			break;
+
+		/* Adjust memory region based on the given range */
+		if (start < buf_min)
+			start = buf_min;
+		if (end > buf_max)
+			end = buf_max;
+
+		start = ALIGN(start, kbuf->buf_align);
+		if (start < end && (end - start + 1) >= kbuf->memsz) {
+			/* Suitable memory range found. Set kbuf->mem */
+			kbuf->mem = ALIGN_DOWN(end - kbuf->memsz + 1,
+					       kbuf->buf_align);
+			ret = 0;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * locate_mem_hole_top_down_ppc64 - Skip special memory regions to find a
+ *                                  suitable buffer with top down approach.
+ * @kbuf:                           Buffer contents and memory parameters.
+ * @buf_min:                        Minimum address for the buffer.
+ * @buf_max:                        Maximum address for the buffer.
+ * @emem:                           Exclude memory ranges.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int locate_mem_hole_top_down_ppc64(struct kexec_buf *kbuf,
+					  u64 buf_min, u64 buf_max,
+					  const struct crash_mem *emem)
+{
+	int i, ret = 0, err = -EADDRNOTAVAIL;
+	u64 start, end, tmin, tmax;
+
+	tmax = buf_max;
+	for (i = (emem->nr_ranges - 1); i >= 0; i--) {
+		start = emem->ranges[i].start;
+		end = emem->ranges[i].end;
+
+		if (start > tmax)
+			continue;
+
+		if (end < tmax) {
+			tmin = (end < buf_min ? buf_min : end + 1);
+			ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
+			if (!ret)
+				return 0;
+		}
+
+		tmax = start - 1;
+
+		if (tmax < buf_min) {
+			ret = err;
+			break;
+		}
+		ret = 0;
+	}
+
+	if (!ret) {
+		tmin = buf_min;
+		ret = __locate_mem_hole_top_down(kbuf, tmin, tmax);
+	}
+	return ret;
+}
+
+/**
+ * __locate_mem_hole_bottom_up - Looks bottom up for a large enough memory hole
+ *                               in the memory regions between buf_min & buf_max
+ *                               for the buffer. If found, sets kbuf->mem.
+ * @kbuf:                        Buffer contents and memory parameters.
+ * @buf_min:                     Minimum address for the buffer.
+ * @buf_max:                     Maximum address for the buffer.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int __locate_mem_hole_bottom_up(struct kexec_buf *kbuf,
+				       u64 buf_min, u64 buf_max)
+{
+	int ret = -EADDRNOTAVAIL;
+	phys_addr_t start, end;
+	u64 i;
+
+	for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE,
+			   MEMBLOCK_NONE, &start, &end, NULL) {
+		/*
+		 * memblock uses [start, end) convention while it is
+		 * [start, end] here. Fix the off-by-one to have the
+		 * same convention.
+		 */
+		end -= 1;
+
+		if (end < buf_min)
+			continue;
+
+		/* Memory hole not found */
+		if (start > buf_max)
+			break;
+
+		/* Adjust memory region based on the given range */
+		if (start < buf_min)
+			start = buf_min;
+		if (end > buf_max)
+			end = buf_max;
+
+		start = ALIGN(start, kbuf->buf_align);
+		if (start < end && (end - start + 1) >= kbuf->memsz) {
+			/* Suitable memory range found. Set kbuf->mem */
+			kbuf->mem = start;
+			ret = 0;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ * locate_mem_hole_bottom_up_ppc64 - Skip special memory regions to find a
+ *                                   suitable buffer with bottom up approach.
+ * @kbuf:                            Buffer contents and memory parameters.
+ * @buf_min:                         Minimum address for the buffer.
+ * @buf_max:                         Maximum address for the buffer.
+ * @emem:                            Exclude memory ranges.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int locate_mem_hole_bottom_up_ppc64(struct kexec_buf *kbuf,
+					   u64 buf_min, u64 buf_max,
+					   const struct crash_mem *emem)
+{
+	int i, ret = 0, err = -EADDRNOTAVAIL;
+	u64 start, end, tmin, tmax;
+
+	tmin = buf_min;
+	for (i = 0; i < emem->nr_ranges; i++) {
+		start = emem->ranges[i].start;
+		end = emem->ranges[i].end;
+
+		if (end < tmin)
+			continue;
+
+		if (start > tmin) {
+			tmax = (start > buf_max ? buf_max : start - 1);
+			ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax);
+			if (!ret)
+				return 0;
+		}
+
+		tmin = end + 1;
+
+		if (tmin > buf_max) {
+			ret = err;
+			break;
+		}
+		ret = 0;
+	}
+
+	if (!ret) {
+		tmax = buf_max;
+		ret = __locate_mem_hole_bottom_up(kbuf, tmin, tmax);
+	}
+	return ret;
+}
+
+/**
+ * check_realloc_usable_mem - Reallocate buffer if it can't accommodate entries
+ * @um_info:                  Usable memory buffer and ranges info.
+ * @cnt:                      No. of entries to accommodate.
+ *
+ * Frees up the old buffer if memory reallocation fails.
+ *
+ * Returns buffer on success, NULL on error.
+ */
+static u64 *check_realloc_usable_mem(struct umem_info *um_info, int cnt)
+{
+	u32 new_size;
+	u64 *tbuf;
+
+	if ((um_info->idx + cnt) <= um_info->max_entries)
+		return um_info->buf;
+
+	new_size = um_info->size + MEM_RANGE_CHUNK_SZ;
+	tbuf = krealloc(um_info->buf, new_size, GFP_KERNEL);
+	if (tbuf) {
+		um_info->buf = tbuf;
+		um_info->size = new_size;
+		um_info->max_entries = (um_info->size / sizeof(u64));
+	}
+
+	return tbuf;
+}
+
+/**
+ * add_usable_mem - Add the usable memory ranges within the given memory range
+ *                  to the buffer
+ * @um_info:        Usable memory buffer and ranges info.
+ * @base:           Base address of memory range to look for.
+ * @end:            End address of memory range to look for.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_usable_mem(struct umem_info *um_info, u64 base, u64 end)
+{
+	u64 loc_base, loc_end;
+	bool add;
+	int i;
+
+	for (i = 0; i < um_info->nr_ranges; i++) {
+		add = false;
+		loc_base = um_info->ranges[i].start;
+		loc_end = um_info->ranges[i].end;
+		if (loc_base >= base && loc_end <= end)
+			add = true;
+		else if (base < loc_end && end > loc_base) {
+			if (loc_base < base)
+				loc_base = base;
+			if (loc_end > end)
+				loc_end = end;
+			add = true;
+		}
+
+		if (add) {
+			if (!check_realloc_usable_mem(um_info, 2))
+				return -ENOMEM;
+
+			um_info->buf[um_info->idx++] = cpu_to_be64(loc_base);
+			um_info->buf[um_info->idx++] =
+					cpu_to_be64(loc_end - loc_base + 1);
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * kdump_setup_usable_lmb - This is a callback function that gets called by
+ *                          walk_drmem_lmbs for every LMB to set its
+ *                          usable memory ranges.
+ * @lmb:                    LMB info.
+ * @usm:                    linux,drconf-usable-memory property value.
+ * @data:                   Pointer to usable memory buffer and ranges info.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int kdump_setup_usable_lmb(struct drmem_lmb *lmb, const __be32 **usm,
+				  void *data)
+{
+	struct umem_info *um_info;
+	int tmp_idx, ret;
+	u64 base, end;
+
+	/*
+	 * kdump load isn't supported on kernels already booted with
+	 * linux,drconf-usable-memory property.
+	 */
+	if (*usm) {
+		pr_err("linux,drconf-usable-memory property already exists!");
+		return -EINVAL;
+	}
+
+	um_info = data;
+	tmp_idx = um_info->idx;
+	if (!check_realloc_usable_mem(um_info, 1))
+		return -ENOMEM;
+
+	um_info->idx++;
+	base = lmb->base_addr;
+	end = base + drmem_lmb_size() - 1;
+	ret = add_usable_mem(um_info, base, end);
+	if (!ret) {
+		/*
+		 * Update the no. of ranges added. Two entries (base & size)
+		 * for every range added.
+		 */
+		um_info->buf[tmp_idx] =
+				cpu_to_be64((um_info->idx - tmp_idx - 1) / 2);
+	}
+
+	return ret;
+}
+
+#define NODE_PATH_LEN		256
+/**
+ * add_usable_mem_property - Add usable memory property for the given
+ *                           memory node.
+ * @fdt:                     Flattened device tree for the kdump kernel.
+ * @dn:                      Memory node.
+ * @um_info:                 Usable memory buffer and ranges info.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_usable_mem_property(void *fdt, struct device_node *dn,
+				   struct umem_info *um_info)
+{
+	int n_mem_addr_cells, n_mem_size_cells, node;
+	char path[NODE_PATH_LEN];
+	int i, len, ranges, ret;
+	const __be32 *prop;
+	u64 base, end;
+
+	of_node_get(dn);
+
+	if (snprintf(path, NODE_PATH_LEN, "%pOF", dn) > (NODE_PATH_LEN - 1)) {
+		pr_err("Buffer (%d) too small for memory node: %pOF\n",
+		       NODE_PATH_LEN, dn);
+		return -EOVERFLOW;
+	}
+	pr_debug("Memory node path: %s\n", path);
+
+	/* Now that we know the path, find its offset in kdump kernel's fdt */
+	node = fdt_path_offset(fdt, path);
+	if (node < 0) {
+		pr_err("Malformed device tree: error reading %s\n", path);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* Get the address & size cells */
+	n_mem_addr_cells = of_n_addr_cells(dn);
+	n_mem_size_cells = of_n_size_cells(dn);
+	pr_debug("address cells: %d, size cells: %d\n", n_mem_addr_cells,
+		 n_mem_size_cells);
+
+	um_info->idx  = 0;
+	if (!check_realloc_usable_mem(um_info, 2)) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	prop = of_get_property(dn, "reg", &len);
+	if (!prop || len <= 0) {
+		ret = 0;
+		goto out;
+	}
+
+	/*
+	 * "reg" property represents sequence of (addr,size) tuples
+	 * each representing a memory range.
+	 */
+	ranges = (len >> 2) / (n_mem_addr_cells + n_mem_size_cells);
+
+	for (i = 0; i < ranges; i++) {
+		base = of_read_number(prop, n_mem_addr_cells);
+		prop += n_mem_addr_cells;
+		end = base + of_read_number(prop, n_mem_size_cells) - 1;
+		prop += n_mem_size_cells;
+
+		ret = add_usable_mem(um_info, base, end);
+		if (ret)
+			goto out;
+	}
+
+	/*
+	 * No kdump kernel usable memory found in this memory node.
+	 * Write (0,0) tuple in linux,usable-memory property for
+	 * this region to be ignored.
+	 */
+	if (um_info->idx == 0) {
+		um_info->buf[0] = 0;
+		um_info->buf[1] = 0;
+		um_info->idx = 2;
+	}
+
+	ret = fdt_setprop(fdt, node, "linux,usable-memory", um_info->buf,
+			  (um_info->idx * sizeof(u64)));
+
+out:
+	of_node_put(dn);
+	return ret;
+}
+
+
+/**
+ * update_usable_mem_fdt - Updates kdump kernel's fdt with linux,usable-memory
+ *                         and linux,drconf-usable-memory DT properties as
+ *                         appropriate to restrict its memory usage.
+ * @fdt:                   Flattened device tree for the kdump kernel.
+ * @usable_mem:            Usable memory ranges for kdump kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int update_usable_mem_fdt(void *fdt, struct crash_mem *usable_mem)
+{
+	struct umem_info um_info;
+	struct device_node *dn;
+	int node, ret = 0;
+
+	if (!usable_mem) {
+		pr_err("Usable memory ranges for kdump kernel not found\n");
+		return -ENOENT;
+	}
+
+	node = fdt_path_offset(fdt, "/ibm,dynamic-reconfiguration-memory");
+	if (node == -FDT_ERR_NOTFOUND)
+		pr_debug("No dynamic reconfiguration memory found\n");
+	else if (node < 0) {
+		pr_err("Malformed device tree: error reading /ibm,dynamic-reconfiguration-memory.\n");
+		return -EINVAL;
+	}
+
+	um_info.buf  = NULL;
+	um_info.size = 0;
+	um_info.max_entries = 0;
+	um_info.idx  = 0;
+	/* Memory ranges to look up */
+	um_info.ranges = &(usable_mem->ranges[0]);
+	um_info.nr_ranges = usable_mem->nr_ranges;
+
+	dn = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
+	if (dn) {
+		ret = walk_drmem_lmbs(dn, &um_info, kdump_setup_usable_lmb);
+		of_node_put(dn);
+
+		if (ret) {
+			pr_err("Could not setup linux,drconf-usable-memory property for kdump\n");
+			goto out;
+		}
+
+		ret = fdt_setprop(fdt, node, "linux,drconf-usable-memory",
+				  um_info.buf, (um_info.idx * sizeof(u64)));
+		if (ret) {
+			pr_err("Failed to update fdt with linux,drconf-usable-memory property");
+			goto out;
+		}
+	}
+
+	/*
+	 * Walk through each memory node and set linux,usable-memory property
+	 * for the corresponding node in kdump kernel's fdt.
+	 */
+	for_each_node_by_type(dn, "memory") {
+		ret = add_usable_mem_property(fdt, dn, &um_info);
+		if (ret) {
+			pr_err("Failed to set linux,usable-memory property for %s node",
+			       dn->full_name);
+			goto out;
+		}
+	}
+
+out:
+	kfree(um_info.buf);
+	return ret;
+}
+
+/**
+ * load_backup_segment - Locate a memory hole to place the backup region.
+ * @image:               Kexec image.
+ * @kbuf:                Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int load_backup_segment(struct kimage *image, struct kexec_buf *kbuf)
+{
+	void *buf;
+	int ret;
+
+	/*
+	 * Setup a source buffer for backup segment.
+	 *
+	 * A source buffer has no meaning for backup region as data will
+	 * be copied from backup source, after crash, in the purgatory.
+	 * But as load segment code doesn't recognize such segments,
+	 * setup a dummy source buffer to keep it happy for now.
+	 */
+	buf = vzalloc(BACKUP_SRC_SIZE);
+	if (!buf)
+		return -ENOMEM;
+
+	kbuf->buffer = buf;
+	kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
+	kbuf->bufsz = kbuf->memsz = BACKUP_SRC_SIZE;
+	kbuf->top_down = false;
+
+	ret = kexec_add_buffer(kbuf);
+	if (ret) {
+		vfree(buf);
+		return ret;
+	}
+
+	image->arch.backup_buf = buf;
+	image->arch.backup_start = kbuf->mem;
+	return 0;
+}
+
+/**
+ * update_backup_region_phdr - Update backup region's offset for the core to
+ *                             export the region appropriately.
+ * @image:                     Kexec image.
+ * @ehdr:                      ELF core header.
+ *
+ * Assumes an exclusive program header is setup for the backup region
+ * in the ELF headers
+ *
+ * Returns nothing.
+ */
+static void update_backup_region_phdr(struct kimage *image, Elf64_Ehdr *ehdr)
+{
+	Elf64_Phdr *phdr;
+	unsigned int i;
+
+	phdr = (Elf64_Phdr *)(ehdr + 1);
+	for (i = 0; i < ehdr->e_phnum; i++) {
+		if (phdr->p_paddr == BACKUP_SRC_START) {
+			phdr->p_offset = image->arch.backup_start;
+			pr_debug("Backup region offset updated to 0x%lx\n",
+				 image->arch.backup_start);
+			return;
+		}
+	}
+}
+
+/**
+ * load_elfcorehdr_segment - Setup crash memory ranges and initialize elfcorehdr
+ *                           segment needed to load kdump kernel.
+ * @image:                   Kexec image.
+ * @kbuf:                    Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int load_elfcorehdr_segment(struct kimage *image, struct kexec_buf *kbuf)
+{
+	struct crash_mem *cmem = NULL;
+	unsigned long headers_sz;
+	void *headers = NULL;
+	int ret;
+
+	ret = get_crash_memory_ranges(&cmem);
+	if (ret)
+		goto out;
+
+	/* Setup elfcorehdr segment */
+	ret = crash_prepare_elf64_headers(cmem, false, &headers, &headers_sz);
+	if (ret) {
+		pr_err("Failed to prepare elf headers for the core\n");
+		goto out;
+	}
+
+	/* Fix the offset for backup region in the ELF header */
+	update_backup_region_phdr(image, headers);
+
+	kbuf->buffer = headers;
+	kbuf->mem = KEXEC_BUF_MEM_UNKNOWN;
+	kbuf->bufsz = kbuf->memsz = headers_sz;
+	kbuf->top_down = false;
+
+	ret = kexec_add_buffer(kbuf);
+	if (ret) {
+		vfree(headers);
+		goto out;
+	}
+
+	image->arch.elfcorehdr_addr = kbuf->mem;
+	image->arch.elf_headers_sz = headers_sz;
+	image->arch.elf_headers = headers;
+out:
+	kfree(cmem);
+	return ret;
+}
+
+/**
+ * load_crashdump_segments_ppc64 - Initialize the additional segements needed
+ *                                 to load kdump kernel.
+ * @image:                         Kexec image.
+ * @kbuf:                          Buffer contents and memory parameters.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int load_crashdump_segments_ppc64(struct kimage *image,
+				  struct kexec_buf *kbuf)
+{
+	int ret;
+
+	/* Load backup segment - first 64K bytes of the crashing kernel */
+	ret = load_backup_segment(image, kbuf);
+	if (ret) {
+		pr_err("Failed to load backup segment\n");
+		return ret;
+	}
+	pr_debug("Loaded the backup region at 0x%lx\n", kbuf->mem);
+
+	/* Load elfcorehdr segment - to export crashing kernel's vmcore */
+	ret = load_elfcorehdr_segment(image, kbuf);
+	if (ret) {
+		pr_err("Failed to load elfcorehdr segment\n");
+		return ret;
+	}
+	pr_debug("Loaded elf core header at 0x%lx, bufsz=0x%lx memsz=0x%lx\n",
+		 image->arch.elfcorehdr_addr, kbuf->bufsz, kbuf->memsz);
+
+	return 0;
+}
+
+/**
+ * setup_purgatory_ppc64 - initialize PPC64 specific purgatory's global
+ *                         variables and call setup_purgatory() to initialize
+ *                         common global variable.
+ * @image:                 kexec image.
+ * @slave_code:            Slave code for the purgatory.
+ * @fdt:                   Flattened device tree for the next kernel.
+ * @kernel_load_addr:      Address where the kernel is loaded.
+ * @fdt_load_addr:         Address where the flattened device tree is loaded.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+			  const void *fdt, unsigned long kernel_load_addr,
+			  unsigned long fdt_load_addr)
+{
+	struct device_node *dn = NULL;
+	int ret;
+
+	ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
+			      fdt_load_addr);
+	if (ret)
+		goto out;
+
+	if (image->type == KEXEC_TYPE_CRASH) {
+		u32 my_run_at_load = 1;
+
+		/*
+		 * Tell relocatable kernel to run at load address
+		 * via the word meant for that at 0x5c.
+		 */
+		ret = kexec_purgatory_get_set_symbol(image, "run_at_load",
+						     &my_run_at_load,
+						     sizeof(my_run_at_load),
+						     false);
+		if (ret)
+			goto out;
+	}
+
+	/* Tell purgatory where to look for backup region */
+	ret = kexec_purgatory_get_set_symbol(image, "backup_start",
+					     &image->arch.backup_start,
+					     sizeof(image->arch.backup_start),
+					     false);
+	if (ret)
+		goto out;
+
+	/* Setup OPAL base & entry values */
+	dn = of_find_node_by_path("/ibm,opal");
+	if (dn) {
+		u64 val;
+
+		of_property_read_u64(dn, "opal-base-address", &val);
+		ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val,
+						     sizeof(val), false);
+		if (ret)
+			goto out;
+
+		of_property_read_u64(dn, "opal-entry-address", &val);
+		ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val,
+						     sizeof(val), false);
+	}
+out:
+	if (ret)
+		pr_err("Failed to setup purgatory symbols");
+	of_node_put(dn);
+	return ret;
+}
+
+/**
+ * setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
+ *                       being loaded.
+ * @image:               kexec image being loaded.
+ * @fdt:                 Flattened device tree for the next kernel.
+ * @initrd_load_addr:    Address where the next initrd will be loaded.
+ * @initrd_len:          Size of the next initrd, or 0 if there will be none.
+ * @cmdline:             Command line for the next kernel, or NULL if there will
+ *                       be none.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int setup_new_fdt_ppc64(const struct kimage *image, void *fdt,
+			unsigned long initrd_load_addr,
+			unsigned long initrd_len, const char *cmdline)
+{
+	struct crash_mem *umem = NULL, *rmem = NULL;
+	int i, nr_ranges, ret;
+
+	ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
+	if (ret)
+		goto out;
+
+	/*
+	 * Restrict memory usage for kdump kernel by setting up
+	 * usable memory ranges and memory reserve map.
+	 */
+	if (image->type == KEXEC_TYPE_CRASH) {
+		ret = get_usable_memory_ranges(&umem);
+		if (ret)
+			goto out;
+
+		ret = update_usable_mem_fdt(fdt, umem);
+		if (ret) {
+			pr_err("Error setting up usable-memory property for kdump kernel\n");
+			goto out;
+		}
+
+		/*
+		 * Ensure we don't touch crashed kernel's memory except the
+		 * first 64K of RAM, which will be backed up.
+		 */
+		ret = fdt_add_mem_rsv(fdt, BACKUP_SRC_END + 1,
+				      crashk_res.start - BACKUP_SRC_SIZE);
+		if (ret) {
+			pr_err("Error reserving crash memory: %s\n",
+			       fdt_strerror(ret));
+			goto out;
+		}
+
+		/* Ensure backup region is not used by kdump/capture kernel */
+		ret = fdt_add_mem_rsv(fdt, image->arch.backup_start,
+				      BACKUP_SRC_SIZE);
+		if (ret) {
+			pr_err("Error reserving memory for backup: %s\n",
+			       fdt_strerror(ret));
+			goto out;
+		}
+	}
+
+	/* Update memory reserve map */
+	ret = get_reserved_memory_ranges(&rmem);
+	if (ret)
+		goto out;
+
+	nr_ranges = rmem ? rmem->nr_ranges : 0;
+	for (i = 0; i < nr_ranges; i++) {
+		u64 base, size;
+
+		base = rmem->ranges[i].start;
+		size = rmem->ranges[i].end - base + 1;
+		ret = fdt_add_mem_rsv(fdt, base, size);
+		if (ret) {
+			pr_err("Error updating memory reserve map: %s\n",
+			       fdt_strerror(ret));
+			goto out;
+		}
+	}
+
+out:
+	kfree(rmem);
+	kfree(umem);
+	return ret;
+}
+
+/**
+ * arch_kexec_locate_mem_hole - Skip special memory regions like rtas, opal,
+ *                              tce-table, reserved-ranges & such (exclude
+ *                              memory ranges) as they can't be used for kexec
+ *                              segment buffer. Sets kbuf->mem when a suitable
+ *                              memory hole is found.
+ * @kbuf:                       Buffer contents and memory parameters.
+ *
+ * Assumes minimum of PAGE_SIZE alignment for kbuf->memsz & kbuf->buf_align.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
+{
+	struct crash_mem **emem;
+	u64 buf_min, buf_max;
+	int ret;
+
+	/* Look up the exclude ranges list while locating the memory hole */
+	emem = &(kbuf->image->arch.exclude_ranges);
+	if (!(*emem) || ((*emem)->nr_ranges == 0)) {
+		pr_warn("No exclude range list. Using the default locate mem hole method\n");
+		return kexec_locate_mem_hole(kbuf);
+	}
+
+	buf_min = kbuf->buf_min;
+	buf_max = kbuf->buf_max;
+	/* Segments for kdump kernel should be within crashkernel region */
+	if (kbuf->image->type == KEXEC_TYPE_CRASH) {
+		buf_min = (buf_min < crashk_res.start ?
+			   crashk_res.start : buf_min);
+		buf_max = (buf_max > crashk_res.end ?
+			   crashk_res.end : buf_max);
+	}
+
+	if (buf_min > buf_max) {
+		pr_err("Invalid buffer min and/or max values\n");
+		return -EINVAL;
+	}
+
+	if (kbuf->top_down)
+		ret = locate_mem_hole_top_down_ppc64(kbuf, buf_min, buf_max,
+						     *emem);
+	else
+		ret = locate_mem_hole_bottom_up_ppc64(kbuf, buf_min, buf_max,
+						      *emem);
+
+	/* Add the buffer allocated to the exclude list for the next lookup */
+	if (!ret) {
+		add_mem_range(emem, kbuf->mem, kbuf->memsz);
+		sort_memory_ranges(*emem, true);
+	} else {
+		pr_err("Failed to locate memory buffer of size %lu\n",
+		       kbuf->memsz);
+	}
+	return ret;
+}
+
+/**
+ * arch_kexec_kernel_image_probe - Does additional handling needed to setup
+ *                                 kexec segments.
+ * @image:                         kexec image being loaded.
+ * @buf:                           Buffer pointing to elf data.
+ * @buf_len:                       Length of the buffer.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+				  unsigned long buf_len)
+{
+	int ret;
+
+	/* Get exclude memory ranges needed for setting up kexec segments */
+	ret = get_exclude_memory_ranges(&(image->arch.exclude_ranges));
+	if (ret) {
+		pr_err("Failed to setup exclude memory ranges for buffer lookup\n");
+		return ret;
+	}
+
+	return kexec_image_probe_default(image, buf, buf_len);
+}
+
+/**
+ * arch_kimage_file_post_load_cleanup - Frees up all the allocations done
+ *                                      while loading the image.
+ * @image:                              kexec image being loaded.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+	kfree(image->arch.exclude_ranges);
+	image->arch.exclude_ranges = NULL;
+
+	vfree(image->arch.backup_buf);
+	image->arch.backup_buf = NULL;
+
+	vfree(image->arch.elf_headers);
+	image->arch.elf_headers = NULL;
+	image->arch.elf_headers_sz = 0;
+
+	return kexec_image_post_load_cleanup_default(image);
+}
diff --git a/arch/powerpc/kexec/ranges.c b/arch/powerpc/kexec/ranges.c
new file mode 100644
index 0000000..6b81c85
--- /dev/null
+++ b/arch/powerpc/kexec/ranges.c
@@ -0,0 +1,412 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * powerpc code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2020  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Hari Bathini, IBM Corporation.
+ */
+
+#define pr_fmt(fmt) "kexec ranges: " fmt
+
+#include <linux/sort.h>
+#include <linux/kexec.h>
+#include <linux/of_device.h>
+#include <linux/slab.h>
+#include <asm/sections.h>
+#include <asm/kexec_ranges.h>
+
+/**
+ * get_max_nr_ranges - Get the max no. of ranges crash_mem structure
+ *                     could hold, given the size allocated for it.
+ * @size:              Allocation size of crash_mem structure.
+ *
+ * Returns the maximum no. of ranges.
+ */
+static inline unsigned int get_max_nr_ranges(size_t size)
+{
+	return ((size - sizeof(struct crash_mem)) /
+		sizeof(struct crash_mem_range));
+}
+
+/**
+ * get_mem_rngs_size - Get the allocated size of mem_rngs based on
+ *                     max_nr_ranges and chunk size.
+ * @mem_rngs:          Memory ranges.
+ *
+ * Returns the maximum size of @mem_rngs.
+ */
+static inline size_t get_mem_rngs_size(struct crash_mem *mem_rngs)
+{
+	size_t size;
+
+	if (!mem_rngs)
+		return 0;
+
+	size = (sizeof(struct crash_mem) +
+		(mem_rngs->max_nr_ranges * sizeof(struct crash_mem_range)));
+
+	/*
+	 * Memory is allocated in size multiple of MEM_RANGE_CHUNK_SZ.
+	 * So, align to get the actual length.
+	 */
+	return ALIGN(size, MEM_RANGE_CHUNK_SZ);
+}
+
+/**
+ * __add_mem_range - add a memory range to memory ranges list.
+ * @mem_ranges:      Range list to add the memory range to.
+ * @base:            Base address of the range to add.
+ * @size:            Size of the memory range to add.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int __add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+	struct crash_mem *mem_rngs = *mem_ranges;
+
+	if (!mem_rngs || (mem_rngs->nr_ranges == mem_rngs->max_nr_ranges)) {
+		mem_rngs = realloc_mem_ranges(mem_ranges);
+		if (!mem_rngs)
+			return -ENOMEM;
+	}
+
+	mem_rngs->ranges[mem_rngs->nr_ranges].start = base;
+	mem_rngs->ranges[mem_rngs->nr_ranges].end = base + size - 1;
+	pr_debug("Added memory range [%#016llx - %#016llx] at index %d\n",
+		 base, base + size - 1, mem_rngs->nr_ranges);
+	mem_rngs->nr_ranges++;
+	return 0;
+}
+
+/**
+ * __merge_memory_ranges - Merges the given memory ranges list.
+ * @mem_rngs:              Range list to merge.
+ *
+ * Assumes a sorted range list.
+ *
+ * Returns nothing.
+ */
+static void __merge_memory_ranges(struct crash_mem *mem_rngs)
+{
+	struct crash_mem_range *ranges;
+	int i, idx;
+
+	if (!mem_rngs)
+		return;
+
+	idx = 0;
+	ranges = &(mem_rngs->ranges[0]);
+	for (i = 1; i < mem_rngs->nr_ranges; i++) {
+		if (ranges[i].start <= (ranges[i-1].end + 1))
+			ranges[idx].end = ranges[i].end;
+		else {
+			idx++;
+			if (i == idx)
+				continue;
+
+			ranges[idx] = ranges[i];
+		}
+	}
+	mem_rngs->nr_ranges = idx + 1;
+}
+
+/* cmp_func_t callback to sort ranges with sort() */
+static int rngcmp(const void *_x, const void *_y)
+{
+	const struct crash_mem_range *x = _x, *y = _y;
+
+	if (x->start > y->start)
+		return 1;
+	if (x->start < y->start)
+		return -1;
+	return 0;
+}
+
+/**
+ * sort_memory_ranges - Sorts the given memory ranges list.
+ * @mem_rngs:           Range list to sort.
+ * @merge:              If true, merge the list after sorting.
+ *
+ * Returns nothing.
+ */
+void sort_memory_ranges(struct crash_mem *mem_rngs, bool merge)
+{
+	int i;
+
+	if (!mem_rngs)
+		return;
+
+	/* Sort the ranges in-place */
+	sort(&(mem_rngs->ranges[0]), mem_rngs->nr_ranges,
+	     sizeof(mem_rngs->ranges[0]), rngcmp, NULL);
+
+	if (merge)
+		__merge_memory_ranges(mem_rngs);
+
+	/* For debugging purpose */
+	pr_debug("Memory ranges:\n");
+	for (i = 0; i < mem_rngs->nr_ranges; i++) {
+		pr_debug("\t[%03d][%#016llx - %#016llx]\n", i,
+			 mem_rngs->ranges[i].start,
+			 mem_rngs->ranges[i].end);
+	}
+}
+
+/**
+ * realloc_mem_ranges - reallocate mem_ranges with size incremented
+ *                      by MEM_RANGE_CHUNK_SZ. Frees up the old memory,
+ *                      if memory allocation fails.
+ * @mem_ranges:         Memory ranges to reallocate.
+ *
+ * Returns pointer to reallocated memory on success, NULL otherwise.
+ */
+struct crash_mem *realloc_mem_ranges(struct crash_mem **mem_ranges)
+{
+	struct crash_mem *mem_rngs = *mem_ranges;
+	unsigned int nr_ranges;
+	size_t size;
+
+	size = get_mem_rngs_size(mem_rngs);
+	nr_ranges = mem_rngs ? mem_rngs->nr_ranges : 0;
+
+	size += MEM_RANGE_CHUNK_SZ;
+	mem_rngs = krealloc(*mem_ranges, size, GFP_KERNEL);
+	if (!mem_rngs) {
+		kfree(*mem_ranges);
+		*mem_ranges = NULL;
+		return NULL;
+	}
+
+	mem_rngs->nr_ranges = nr_ranges;
+	mem_rngs->max_nr_ranges = get_max_nr_ranges(size);
+	*mem_ranges = mem_rngs;
+
+	return mem_rngs;
+}
+
+/**
+ * add_mem_range - Updates existing memory range, if there is an overlap.
+ *                 Else, adds a new memory range.
+ * @mem_ranges:    Range list to add the memory range to.
+ * @base:          Base address of the range to add.
+ * @size:          Size of the memory range to add.
+ *
+ * (Re)allocates memory, if needed.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_mem_range(struct crash_mem **mem_ranges, u64 base, u64 size)
+{
+	struct crash_mem *mem_rngs = *mem_ranges;
+	u64 mstart, mend, end;
+	unsigned int i;
+
+	if (!size)
+		return 0;
+
+	end = base + size - 1;
+
+	if (!mem_rngs || !(mem_rngs->nr_ranges))
+		return __add_mem_range(mem_ranges, base, size);
+
+	for (i = 0; i < mem_rngs->nr_ranges; i++) {
+		mstart = mem_rngs->ranges[i].start;
+		mend = mem_rngs->ranges[i].end;
+		if (base < mend && end > mstart) {
+			if (base < mstart)
+				mem_rngs->ranges[i].start = base;
+			if (end > mend)
+				mem_rngs->ranges[i].end = end;
+			return 0;
+		}
+	}
+
+	return __add_mem_range(mem_ranges, base, size);
+}
+
+/**
+ * add_tce_mem_ranges - Adds tce-table range to the given memory ranges list.
+ * @mem_ranges:         Range list to add the memory range(s) to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_tce_mem_ranges(struct crash_mem **mem_ranges)
+{
+	struct device_node *dn = NULL;
+	int ret = 0;
+
+	for_each_node_by_type(dn, "pci") {
+		u64 base;
+		u32 size;
+
+		ret = of_property_read_u64(dn, "linux,tce-base", &base);
+		ret |= of_property_read_u32(dn, "linux,tce-size", &size);
+		if (ret) {
+			/*
+			 * It is ok to have pci nodes without tce. So, ignore
+			 * property does not exist error.
+			 */
+			if (ret == -EINVAL) {
+				ret = 0;
+				continue;
+			}
+			break;
+		}
+
+		ret = add_mem_range(mem_ranges, base, size);
+		if (ret)
+			break;
+	}
+
+	of_node_put(dn);
+	return ret;
+}
+
+/**
+ * add_initrd_mem_range - Adds initrd range to the given memory ranges list,
+ *                        if the initrd was retained.
+ * @mem_ranges:           Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_initrd_mem_range(struct crash_mem **mem_ranges)
+{
+	u64 base, end;
+	int ret;
+
+	/* This range means something, only if initrd was retained */
+	if (!strstr(saved_command_line, "retain_initrd"))
+		return 0;
+
+	ret = of_property_read_u64(of_chosen, "linux,initrd-start", &base);
+	ret |= of_property_read_u64(of_chosen, "linux,initrd-end", &end);
+	if (!ret)
+		ret = add_mem_range(mem_ranges, base, end - base + 1);
+
+	return ret;
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/**
+ * add_htab_mem_range - Adds htab range to the given memory ranges list,
+ *                      if it exists
+ * @mem_ranges:         Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_htab_mem_range(struct crash_mem **mem_ranges)
+{
+	if (!htab_address)
+		return 0;
+
+	return add_mem_range(mem_ranges, __pa(htab_address), htab_size_bytes);
+}
+#endif
+
+/**
+ * add_kernel_mem_range - Adds kernel text region to the given
+ *                        memory ranges list.
+ * @mem_ranges:           Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_kernel_mem_range(struct crash_mem **mem_ranges)
+{
+	return add_mem_range(mem_ranges, 0, __pa(_end));
+}
+
+/**
+ * add_rtas_mem_range - Adds RTAS region to the given memory ranges list.
+ * @mem_ranges:         Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_rtas_mem_range(struct crash_mem **mem_ranges)
+{
+	struct device_node *dn;
+	u32 base, size;
+	int ret = 0;
+
+	dn = of_find_node_by_path("/rtas");
+	if (!dn)
+		return 0;
+
+	ret = of_property_read_u32(dn, "linux,rtas-base", &base);
+	ret |= of_property_read_u32(dn, "rtas-size", &size);
+	if (!ret)
+		ret = add_mem_range(mem_ranges, base, size);
+
+	of_node_put(dn);
+	return ret;
+}
+
+/**
+ * add_opal_mem_range - Adds OPAL region to the given memory ranges list.
+ * @mem_ranges:         Range list to add the memory range to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_opal_mem_range(struct crash_mem **mem_ranges)
+{
+	struct device_node *dn;
+	u64 base, size;
+	int ret;
+
+	dn = of_find_node_by_path("/ibm,opal");
+	if (!dn)
+		return 0;
+
+	ret = of_property_read_u64(dn, "opal-base-address", &base);
+	ret |= of_property_read_u64(dn, "opal-runtime-size", &size);
+	if (!ret)
+		ret = add_mem_range(mem_ranges, base, size);
+
+	of_node_put(dn);
+	return ret;
+}
+
+/**
+ * add_reserved_mem_ranges - Adds "/reserved-ranges" regions exported by f/w
+ *                           to the given memory ranges list.
+ * @mem_ranges:              Range list to add the memory ranges to.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+int add_reserved_mem_ranges(struct crash_mem **mem_ranges)
+{
+	int n_mem_addr_cells, n_mem_size_cells, i, len, cells, ret = 0;
+	const __be32 *prop;
+
+	prop = of_get_property(of_root, "reserved-ranges", &len);
+	if (!prop)
+		return 0;
+
+	n_mem_addr_cells = of_n_addr_cells(of_root);
+	n_mem_size_cells = of_n_size_cells(of_root);
+	cells = n_mem_addr_cells + n_mem_size_cells;
+
+	/* Each reserved range is an (address,size) pair */
+	for (i = 0; i < (len / (sizeof(u32) * cells)); i++) {
+		u64 base, size;
+
+		base = of_read_number(prop + (i * cells), n_mem_addr_cells);
+		size = of_read_number(prop + (i * cells) + n_mem_addr_cells,
+				      n_mem_size_cells);
+
+		ret = add_mem_range(mem_ranges, base, size);
+		if (ret)
+			break;
+	}
+
+	return ret;
+}
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index ebb04f3..0f83f39 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -765,7 +765,7 @@
 			return H_P3;
 		vcpu->arch.ciabr  = value1;
 		return H_SUCCESS;
-	case H_SET_MODE_RESOURCE_SET_DAWR:
+	case H_SET_MODE_RESOURCE_SET_DAWR0:
 		if (!kvmppc_power8_compatible(vcpu))
 			return H_P2;
 		if (!ppc_breakpoint_available())
@@ -1680,10 +1680,22 @@
 	case KVM_REG_PPC_UAMOR:
 		*val = get_reg_val(id, vcpu->arch.uamor);
 		break;
-	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
+	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
 		i = id - KVM_REG_PPC_MMCR0;
 		*val = get_reg_val(id, vcpu->arch.mmcr[i]);
 		break;
+	case KVM_REG_PPC_MMCR2:
+		*val = get_reg_val(id, vcpu->arch.mmcr[2]);
+		break;
+	case KVM_REG_PPC_MMCRA:
+		*val = get_reg_val(id, vcpu->arch.mmcra);
+		break;
+	case KVM_REG_PPC_MMCRS:
+		*val = get_reg_val(id, vcpu->arch.mmcrs);
+		break;
+	case KVM_REG_PPC_MMCR3:
+		*val = get_reg_val(id, vcpu->arch.mmcr[3]);
+		break;
 	case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
 		i = id - KVM_REG_PPC_PMC1;
 		*val = get_reg_val(id, vcpu->arch.pmc[i]);
@@ -1699,7 +1711,13 @@
 		*val = get_reg_val(id, vcpu->arch.sdar);
 		break;
 	case KVM_REG_PPC_SIER:
-		*val = get_reg_val(id, vcpu->arch.sier);
+		*val = get_reg_val(id, vcpu->arch.sier[0]);
+		break;
+	case KVM_REG_PPC_SIER2:
+		*val = get_reg_val(id, vcpu->arch.sier[1]);
+		break;
+	case KVM_REG_PPC_SIER3:
+		*val = get_reg_val(id, vcpu->arch.sier[2]);
 		break;
 	case KVM_REG_PPC_IAMR:
 		*val = get_reg_val(id, vcpu->arch.iamr);
@@ -1901,10 +1919,22 @@
 	case KVM_REG_PPC_UAMOR:
 		vcpu->arch.uamor = set_reg_val(id, *val);
 		break;
-	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCRS:
+	case KVM_REG_PPC_MMCR0 ... KVM_REG_PPC_MMCR1:
 		i = id - KVM_REG_PPC_MMCR0;
 		vcpu->arch.mmcr[i] = set_reg_val(id, *val);
 		break;
+	case KVM_REG_PPC_MMCR2:
+		vcpu->arch.mmcr[2] = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MMCRA:
+		vcpu->arch.mmcra = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MMCRS:
+		vcpu->arch.mmcrs = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_MMCR3:
+		*val = get_reg_val(id, vcpu->arch.mmcr[3]);
+		break;
 	case KVM_REG_PPC_PMC1 ... KVM_REG_PPC_PMC8:
 		i = id - KVM_REG_PPC_PMC1;
 		vcpu->arch.pmc[i] = set_reg_val(id, *val);
@@ -1920,7 +1950,13 @@
 		vcpu->arch.sdar = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_SIER:
-		vcpu->arch.sier = set_reg_val(id, *val);
+		vcpu->arch.sier[0] = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_SIER2:
+		vcpu->arch.sier[1] = set_reg_val(id, *val);
+		break;
+	case KVM_REG_PPC_SIER3:
+		vcpu->arch.sier[2] = set_reg_val(id, *val);
 		break;
 	case KVM_REG_PPC_IAMR:
 		vcpu->arch.iamr = set_reg_val(id, *val);
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7cd3cf3d..073617c 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -113,7 +113,7 @@
 
 	selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT;
 	if (selected_size) {
-		pr_debug("%s: reserving %ld MiB for global area\n", __func__,
+		pr_info("%s: reserving %ld MiB for global area\n", __func__,
 			 (unsigned long)selected_size / SZ_1M);
 		align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
 		cma_declare_contiguous(0, selected_size, 0, align_size,
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 63fd81f..59822cb 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -140,6 +140,14 @@
 	std	r8, HSTATE_MMCR2(r13)
 	std	r9, HSTATE_SIER(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
+	mfspr	r5, SPRN_MMCR3
+	mfspr	r6, SPRN_SIER2
+	mfspr	r7, SPRN_SIER3
+	std	r5, HSTATE_MMCR3(r13)
+	std	r6, HSTATE_SIER2(r13)
+	std	r7, HSTATE_SIER3(r13)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 	mfspr	r3, SPRN_PMC1
 	mfspr	r5, SPRN_PMC2
 	mfspr	r6, SPRN_PMC3
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 79f7d07..6028628 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -244,7 +244,7 @@
 {
 	bool resync_req;
 
-	__this_cpu_inc(irq_stat.hmi_exceptions);
+	local_paca->hmi_irqs++;
 
 	if (hmi_handle_debugtrig(NULL) >= 0)
 		return 1;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 7194389..799d6d0 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -3428,7 +3428,7 @@
 	mtspr	SPRN_PMC6, r9
 	ld	r3, VCPU_MMCR(r4)
 	ld	r5, VCPU_MMCR + 8(r4)
-	ld	r6, VCPU_MMCR + 16(r4)
+	ld	r6, VCPU_MMCRA(r4)
 	ld	r7, VCPU_SIAR(r4)
 	ld	r8, VCPU_SDAR(r4)
 	mtspr	SPRN_MMCR1, r5
@@ -3436,14 +3436,22 @@
 	mtspr	SPRN_SIAR, r7
 	mtspr	SPRN_SDAR, r8
 BEGIN_FTR_SECTION
-	ld	r5, VCPU_MMCR + 24(r4)
+	ld      r5, VCPU_MMCR + 24(r4)
+	ld      r6, VCPU_SIER + 8(r4)
+	ld      r7, VCPU_SIER + 16(r4)
+	mtspr   SPRN_MMCR3, r5
+	mtspr   SPRN_SIER2, r6
+	mtspr   SPRN_SIER3, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
+BEGIN_FTR_SECTION
+	ld	r5, VCPU_MMCR + 16(r4)
 	ld	r6, VCPU_SIER(r4)
 	mtspr	SPRN_MMCR2, r5
 	mtspr	SPRN_SIER, r6
 BEGIN_FTR_SECTION_NESTED(96)
 	lwz	r7, VCPU_PMC + 24(r4)
 	lwz	r8, VCPU_PMC + 28(r4)
-	ld	r9, VCPU_MMCR + 32(r4)
+	ld	r9, VCPU_MMCRS(r4)
 	mtspr	SPRN_SPMC1, r7
 	mtspr	SPRN_SPMC2, r8
 	mtspr	SPRN_MMCRS, r9
@@ -3496,6 +3504,14 @@
 	mtspr	SPRN_MMCR2, r8
 	mtspr	SPRN_SIER, r9
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
+	ld      r5, HSTATE_MMCR3(r13)
+	ld      r6, HSTATE_SIER2(r13)
+	ld      r7, HSTATE_SIER3(r13)
+	mtspr   SPRN_MMCR3, r5
+	mtspr   SPRN_SIER2, r6
+	mtspr   SPRN_SIER3, r7
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 	mtspr	SPRN_MMCR0, r3
 	isync
 	mtlr	r0
@@ -3551,10 +3567,18 @@
 	mfspr	r8, SPRN_SDAR
 	std	r4, VCPU_MMCR(r9)
 	std	r5, VCPU_MMCR + 8(r9)
-	std	r6, VCPU_MMCR + 16(r9)
+	std	r6, VCPU_MMCRA(r9)
 BEGIN_FTR_SECTION
-	std	r10, VCPU_MMCR + 24(r9)
+	std	r10, VCPU_MMCR + 16(r9)
 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+BEGIN_FTR_SECTION
+	mfspr   r5, SPRN_MMCR3
+	mfspr   r6, SPRN_SIER2
+	mfspr   r7, SPRN_SIER3
+	std     r5, VCPU_MMCR + 24(r9)
+	std     r6, VCPU_SIER + 8(r9)
+	std     r7, VCPU_SIER + 16(r9)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_31)
 	std	r7, VCPU_SIAR(r9)
 	std	r8, VCPU_SDAR(r9)
 	mfspr	r3, SPRN_PMC1
@@ -3578,7 +3602,7 @@
 	mfspr	r8, SPRN_MMCRS
 	stw	r6, VCPU_PMC + 24(r9)
 	stw	r7, VCPU_PMC + 28(r9)
-	std	r8, VCPU_MMCR + 32(r9)
+	std	r8, VCPU_MMCRS(r9)
 	lis	r4, 0x8000
 	mtspr	SPRN_MMCRS, r4
 END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S
index f7ad99d..607a9b9 100644
--- a/arch/powerpc/kvm/book3s_interrupts.S
+++ b/arch/powerpc/kvm/book3s_interrupts.S
@@ -26,7 +26,7 @@
 #define FUNC(name)		name
 #define GET_SHADOW_VCPU(reg)	lwz     reg, (THREAD + THREAD_KVM_SVCPU)(r2)
 
-#endif /* CONFIG_PPC_BOOK3S_XX */
+#endif /* CONFIG_PPC_BOOK3S_64 */
 
 #define VCPU_LOAD_NVGPRS(vcpu) \
 	PPC_LL	r14, VCPU_GPR(R14)(vcpu); \
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index 4a61a97..830a126 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -89,7 +89,7 @@
 	{H_CREATE_RPT,			"H_CREATE_RPT"}, \
 	{H_REMOVE_RPT,			"H_REMOVE_RPT"}, \
 	{H_REGISTER_RPAGES,		"H_REGISTER_RPAGES"}, \
-	{H_DISABLE_AND_GETC,		"H_DISABLE_AND_GETC"}, \
+	{H_DISABLE_AND_GET,		"H_DISABLE_AND_GET"}, \
 	{H_ERROR_DATA,			"H_ERROR_DATA"}, \
 	{H_GET_HCA_INFO,		"H_GET_HCA_INFO"}, \
 	{H_GET_PERF_COUNT,		"H_GET_PERF_COUNT"}, \
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 5e994cd..d66a645 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -41,7 +41,10 @@
 obj64-y	+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
 	   memcpy_64.o memcpy_mcsafe_64.o
 
+ifndef CONFIG_PPC_QUEUED_SPINLOCKS
 obj64-$(CONFIG_SMP)	+= locks.o
+endif
+
 obj64-$(CONFIG_ALTIVEC)	+= vmx-helper.o
 obj64-$(CONFIG_KPROBES_SANITY_TEST)	+= test_emulate_step.o \
 					   test_emulate_step_exec_instr.o
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 0a051df..8c3934e 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -93,7 +93,7 @@
 	unsigned long pfn;
 	int err;
 
-	if (is_vmalloc_addr(addr))
+	if (is_vmalloc_or_module_addr(addr))
 		pfn = vmalloc_to_pfn(addr);
 	else
 		pfn = __pa_symbol(addr) >> PAGE_SHIFT;
diff --git a/arch/powerpc/lib/locks.c b/arch/powerpc/lib/locks.c
index 6440d5943..04165b7 100644
--- a/arch/powerpc/lib/locks.c
+++ b/arch/powerpc/lib/locks.c
@@ -27,14 +27,14 @@
 		return;
 	holder_cpu = lock_value & 0xffff;
 	BUG_ON(holder_cpu >= NR_CPUS);
-	yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+
+	yield_count = yield_count_of(holder_cpu);
 	if ((yield_count & 1) == 0)
 		return;		/* virtual cpu is currently running */
 	rmb();
 	if (lock->slock != lock_value)
 		return;		/* something has changed */
-	plpar_hcall_norets(H_CONFER,
-		get_hard_smp_processor_id(holder_cpu), yield_count);
+	yield_to_preempted(holder_cpu, yield_count);
 }
 EXPORT_SYMBOL_GPL(splpar_spin_yield);
 
@@ -53,13 +53,13 @@
 		return;		/* no write lock at present */
 	holder_cpu = lock_value & 0xffff;
 	BUG_ON(holder_cpu >= NR_CPUS);
-	yield_count = be32_to_cpu(lppaca_of(holder_cpu).yield_count);
+
+	yield_count = yield_count_of(holder_cpu);
 	if ((yield_count & 1) == 0)
 		return;		/* virtual cpu is currently running */
 	rmb();
 	if (rw->lock != lock_value)
 		return;		/* something has changed */
-	plpar_hcall_norets(H_CONFER,
-		get_hard_smp_processor_id(holder_cpu), yield_count);
+	yield_to_preempted(holder_cpu, yield_count);
 }
 #endif
diff --git a/arch/powerpc/lib/pmem.c b/arch/powerpc/lib/pmem.c
index 0666a8d2..1550e0d 100644
--- a/arch/powerpc/lib/pmem.c
+++ b/arch/powerpc/lib/pmem.c
@@ -9,20 +9,56 @@
 
 #include <asm/cacheflush.h>
 
+static inline void __clean_pmem_range(unsigned long start, unsigned long stop)
+{
+	unsigned long shift = l1_dcache_shift();
+	unsigned long bytes = l1_dcache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+	unsigned long i;
+
+	for (i = 0; i < size >> shift; i++, addr += bytes)
+		asm volatile(PPC_DCBSTPS(%0, %1): :"i"(0), "r"(addr): "memory");
+}
+
+static inline void __flush_pmem_range(unsigned long start, unsigned long stop)
+{
+	unsigned long shift = l1_dcache_shift();
+	unsigned long bytes = l1_dcache_bytes();
+	void *addr = (void *)(start & ~(bytes - 1));
+	unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+	unsigned long i;
+
+	for (i = 0; i < size >> shift; i++, addr += bytes)
+		asm volatile(PPC_DCBFPS(%0, %1): :"i"(0), "r"(addr): "memory");
+}
+
+static inline void clean_pmem_range(unsigned long start, unsigned long stop)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return __clean_pmem_range(start, stop);
+}
+
+static inline void flush_pmem_range(unsigned long start, unsigned long stop)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_207S))
+		return __flush_pmem_range(start, stop);
+}
+
 /*
  * CONFIG_ARCH_HAS_PMEM_API symbols
  */
 void arch_wb_cache_pmem(void *addr, size_t size)
 {
 	unsigned long start = (unsigned long) addr;
-	flush_dcache_range(start, start + size);
+	clean_pmem_range(start, start + size);
 }
 EXPORT_SYMBOL_GPL(arch_wb_cache_pmem);
 
 void arch_invalidate_pmem(void *addr, size_t size)
 {
 	unsigned long start = (unsigned long) addr;
-	flush_dcache_range(start, start + size);
+	flush_pmem_range(start, start + size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
 
@@ -35,19 +71,17 @@
 	unsigned long copied, start = (unsigned long) dest;
 
 	copied = __copy_from_user(dest, src, size);
-	flush_dcache_range(start, start + size);
+	clean_pmem_range(start, start + size);
 
 	return copied;
 }
 
-void *memcpy_flushcache(void *dest, const void *src, size_t size)
+void memcpy_flushcache(void *dest, const void *src, size_t size)
 {
 	unsigned long start = (unsigned long) dest;
 
 	memcpy(dest, src, size);
-	flush_dcache_range(start, start + size);
-
-	return dest;
+	clean_pmem_range(start, start + size);
 }
 EXPORT_SYMBOL(memcpy_flushcache);
 
diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c
index 5abe982..caee8cc 100644
--- a/arch/powerpc/lib/sstep.c
+++ b/arch/powerpc/lib/sstep.c
@@ -16,6 +16,7 @@
 #include <asm/disassemble.h>
 
 extern char system_call_common[];
+extern char system_call_vectored_emulate[];
 
 #ifdef CONFIG_PPC64
 /* Bits in SRR1 that are copied from MSR */
@@ -200,8 +201,8 @@
 	unsigned int  dd;
 	unsigned long ea, d0, d1, d;
 
-	prefix_r = instr & (1ul << 20);
-	ra = (suffix >> 16) & 0x1f;
+	prefix_r = GET_PREFIX_R(instr);
+	ra = GET_PREFIX_RA(suffix);
 
 	d0 = instr & 0x3ffff;
 	d1 = suffix & 0xffff;
@@ -1236,6 +1237,9 @@
 	case 17:	/* sc */
 		if ((word & 0xfe2) == 2)
 			op->type = SYSCALL;
+		else if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) &&
+				(word & 0xfe3) == 1)
+			op->type = SYSCALL_VECTORED_0;
 		else
 			op->type = UNKNOWN;
 		return 0;
@@ -1339,8 +1343,8 @@
 	switch (opcode) {
 #ifdef __powerpc64__
 	case 1:
-		prefix_r = word & (1ul << 20);
-		ra = (suffix >> 16) & 0x1f;
+		prefix_r = GET_PREFIX_R(word);
+		ra = GET_PREFIX_RA(suffix);
 		rd = (suffix >> 21) & 0x1f;
 		op->reg = rd;
 		op->val = regs->gpr[rd];
@@ -1802,7 +1806,18 @@
 			op->val = (int) regs->gpr[ra] /
 				(int) regs->gpr[rb];
 			goto arith_done;
-
+#ifdef __powerpc64__
+		case 425:	/* divde[.] */
+			asm volatile(PPC_DIVDE(%0, %1, %2) :
+				"=r" (op->val) : "r" (regs->gpr[ra]),
+				"r" (regs->gpr[rb]));
+			goto arith_done;
+		case 393:	/* divdeu[.] */
+			asm volatile(PPC_DIVDEU(%0, %1, %2) :
+				"=r" (op->val) : "r" (regs->gpr[ra]),
+				"r" (regs->gpr[rb]));
+			goto arith_done;
+#endif
 		case 755:	/* darn */
 			if (!cpu_has_feature(CPU_FTR_ARCH_300))
 				return -1;
@@ -2715,8 +2730,8 @@
 		}
 		break;
 	case 1: /* Prefixed instructions */
-		prefix_r = word & (1ul << 20);
-		ra = (suffix >> 16) & 0x1f;
+		prefix_r = GET_PREFIX_R(word);
+		ra = GET_PREFIX_RA(suffix);
 		op->update_reg = ra;
 		rd = (suffix >> 21) & 0x1f;
 		op->reg = rd;
@@ -3378,6 +3393,18 @@
 		regs->msr = MSR_KERNEL;
 		return 1;
 
+#ifdef CONFIG_PPC_BOOK3S_64
+	case SYSCALL_VECTORED_0:	/* scv 0 */
+		regs->gpr[9] = regs->gpr[13];
+		regs->gpr[10] = MSR_KERNEL;
+		regs->gpr[11] = regs->nip + 4;
+		regs->gpr[12] = regs->msr & MSR_MASK;
+		regs->gpr[13] = (unsigned long) get_paca();
+		regs->nip = (unsigned long) &system_call_vectored_emulate;
+		regs->msr = MSR_KERNEL;
+		return 1;
+#endif
+
 	case RFI:
 		return -1;
 #endif
diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c
index 46af802..0a201b7 100644
--- a/arch/powerpc/lib/test_emulate_step.c
+++ b/arch/powerpc/lib/test_emulate_step.c
@@ -8,59 +8,51 @@
 #define pr_fmt(fmt) "emulate_step_test: " fmt
 
 #include <linux/ptrace.h>
+#include <asm/cpu_has_feature.h>
 #include <asm/sstep.h>
 #include <asm/ppc-opcode.h>
 #include <asm/code-patching.h>
 #include <asm/inst.h>
 
-#define IMM_L(i)		((uintptr_t)(i) & 0xffff)
-#define IMM_DS(i)		((uintptr_t)(i) & 0xfffc)
-
-/*
- * Defined with TEST_ prefix so it does not conflict with other
- * definitions.
- */
-#define TEST_LD(r, base, i)	ppc_inst(PPC_INST_LD | ___PPC_RT(r) |		\
-					___PPC_RA(base) | IMM_DS(i))
-#define TEST_LWZ(r, base, i)	ppc_inst(PPC_INST_LWZ | ___PPC_RT(r) |		\
-					___PPC_RA(base) | IMM_L(i))
-#define TEST_LWZX(t, a, b)	ppc_inst(PPC_INST_LWZX | ___PPC_RT(t) |		\
-					___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_STD(r, base, i)	ppc_inst(PPC_INST_STD | ___PPC_RS(r) |		\
-					___PPC_RA(base) | IMM_DS(i))
-#define TEST_LDARX(t, a, b, eh)	ppc_inst(PPC_INST_LDARX | ___PPC_RT(t) |	\
-					___PPC_RA(a) | ___PPC_RB(b) |	\
-					__PPC_EH(eh))
-#define TEST_STDCX(s, a, b)	ppc_inst(PPC_INST_STDCX | ___PPC_RS(s) |	\
-					___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_LFSX(t, a, b)	ppc_inst(PPC_INST_LFSX | ___PPC_RT(t) |		\
-					___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_STFSX(s, a, b)	ppc_inst(PPC_INST_STFSX | ___PPC_RS(s) |	\
-					___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_LFDX(t, a, b)	ppc_inst(PPC_INST_LFDX | ___PPC_RT(t) |		\
-					___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_STFDX(s, a, b)	ppc_inst(PPC_INST_STFDX | ___PPC_RS(s) |	\
-					___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_LVX(t, a, b)	ppc_inst(PPC_INST_LVX | ___PPC_RT(t) |		\
-					___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_STVX(s, a, b)	ppc_inst(PPC_INST_STVX | ___PPC_RS(s) |		\
-					___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_LXVD2X(s, a, b)	ppc_inst(PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b))
-#define TEST_STXVD2X(s, a, b)	ppc_inst(PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b))
-#define TEST_ADD(t, a, b)	ppc_inst(PPC_INST_ADD | ___PPC_RT(t) |		\
-					___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_ADD_DOT(t, a, b)	ppc_inst(PPC_INST_ADD | ___PPC_RT(t) |		\
-					___PPC_RA(a) | ___PPC_RB(b) | 0x1)
-#define TEST_ADDC(t, a, b)	ppc_inst(PPC_INST_ADDC | ___PPC_RT(t) |		\
-					___PPC_RA(a) | ___PPC_RB(b))
-#define TEST_ADDC_DOT(t, a, b)	ppc_inst(PPC_INST_ADDC | ___PPC_RT(t) |		\
-					___PPC_RA(a) | ___PPC_RB(b) | 0x1)
-
 #define MAX_SUBTESTS	16
 
 #define IGNORE_GPR(n)	(0x1UL << (n))
 #define IGNORE_XER	(0x1UL << 32)
 #define IGNORE_CCR	(0x1UL << 33)
+#define NEGATIVE_TEST	(0x1UL << 63)
+
+#define TEST_PLD(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_PLD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLWZ(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_RAW_LWZ(r, base, i))
+
+#define TEST_PSTD(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_8LS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_PSTD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLFS(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_LFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PSTFS(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_STFS | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PLFD(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_LFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PSTFD(r, base, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_INST_STFD | ___PPC_RT(r) | ___PPC_RA(base) | IMM_L(i))
+
+#define TEST_PADDI(t, a, i, pr) \
+	ppc_inst_prefix(PPC_PREFIX_MLS | __PPC_PRFX_R(pr) | IMM_H(i), \
+			PPC_RAW_ADDI(t, a, i))
+
 
 static void __init init_pt_regs(struct pt_regs *regs)
 {
@@ -105,7 +97,7 @@
 	regs.gpr[3] = (unsigned long) &a;
 
 	/* ld r5, 0(r3) */
-	stepped = emulate_step(&regs, TEST_LD(5, 3, 0));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LD(5, 3, 0)));
 
 	if (stepped == 1 && regs.gpr[5] == a)
 		show_result("ld", "PASS");
@@ -113,6 +105,29 @@
 		show_result("ld", "FAIL");
 }
 
+static void __init test_pld(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x23;
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long)&a;
+
+	/* pld r5, 0(r3), 0 */
+	stepped = emulate_step(&regs, TEST_PLD(5, 3, 0, 0));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("pld", "PASS");
+	else
+		show_result("pld", "FAIL");
+}
+
 static void __init test_lwz(void)
 {
 	struct pt_regs regs;
@@ -123,7 +138,7 @@
 	regs.gpr[3] = (unsigned long) &a;
 
 	/* lwz r5, 0(r3) */
-	stepped = emulate_step(&regs, TEST_LWZ(5, 3, 0));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LWZ(5, 3, 0)));
 
 	if (stepped == 1 && regs.gpr[5] == a)
 		show_result("lwz", "PASS");
@@ -131,6 +146,30 @@
 		show_result("lwz", "FAIL");
 }
 
+static void __init test_plwz(void)
+{
+	struct pt_regs regs;
+	unsigned int a = 0x4545;
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("plwz", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long)&a;
+
+	/* plwz r5, 0(r3), 0 */
+
+	stepped = emulate_step(&regs, TEST_PLWZ(5, 3, 0, 0));
+
+	if (stepped == 1 && regs.gpr[5] == a)
+		show_result("plwz", "PASS");
+	else
+		show_result("plwz", "FAIL");
+}
+
 static void __init test_lwzx(void)
 {
 	struct pt_regs regs;
@@ -143,7 +182,7 @@
 	regs.gpr[5] = 0x8765;
 
 	/* lwzx r5, r3, r4 */
-	stepped = emulate_step(&regs, TEST_LWZX(5, 3, 4));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LWZX(5, 3, 4)));
 	if (stepped == 1 && regs.gpr[5] == a[2])
 		show_result("lwzx", "PASS");
 	else
@@ -161,13 +200,36 @@
 	regs.gpr[5] = 0x5678;
 
 	/* std r5, 0(r3) */
-	stepped = emulate_step(&regs, TEST_STD(5, 3, 0));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STD(5, 3, 0)));
 	if (stepped == 1 && regs.gpr[5] == a)
 		show_result("std", "PASS");
 	else
 		show_result("std", "FAIL");
 }
 
+static void __init test_pstd(void)
+{
+	struct pt_regs regs;
+	unsigned long a = 0x1234;
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("pstd", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+	regs.gpr[3] = (unsigned long)&a;
+	regs.gpr[5] = 0x5678;
+
+	/* pstd r5, 0(r3), 0 */
+	stepped = emulate_step(&regs, TEST_PSTD(5, 3, 0, 0));
+	if (stepped == 1 || regs.gpr[5] == a)
+		show_result("pstd", "PASS");
+	else
+		show_result("pstd", "FAIL");
+}
+
 static void __init test_ldarx_stdcx(void)
 {
 	struct pt_regs regs;
@@ -186,7 +248,7 @@
 	regs.gpr[5] = 0x5678;
 
 	/* ldarx r5, r3, r4, 0 */
-	stepped = emulate_step(&regs, TEST_LDARX(5, 3, 4, 0));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LDARX(5, 3, 4, 0)));
 
 	/*
 	 * Don't touch 'a' here. Touching 'a' can do Load/store
@@ -204,7 +266,7 @@
 	regs.gpr[5] = 0x9ABC;
 
 	/* stdcx. r5, r3, r4 */
-	stepped = emulate_step(&regs, TEST_STDCX(5, 3, 4));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STDCX(5, 3, 4)));
 
 	/*
 	 * Two possible scenarios that indicates successful emulation
@@ -244,7 +306,7 @@
 	regs.gpr[4] = 0;
 
 	/* lfsx frt10, r3, r4 */
-	stepped = emulate_step(&regs, TEST_LFSX(10, 3, 4));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LFSX(10, 3, 4)));
 
 	if (stepped == 1)
 		show_result("lfsx", "PASS");
@@ -257,7 +319,7 @@
 	c.a = 678.91;
 
 	/* stfsx frs10, r3, r4 */
-	stepped = emulate_step(&regs, TEST_STFSX(10, 3, 4));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STFSX(10, 3, 4)));
 
 	if (stepped == 1 && c.b == cached_b)
 		show_result("stfsx", "PASS");
@@ -265,6 +327,53 @@
 		show_result("stfsx", "FAIL");
 }
 
+static void __init test_plfs_pstfs(void)
+{
+	struct pt_regs regs;
+	union {
+		float a;
+		int b;
+	} c;
+	int cached_b;
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+
+
+	/*** plfs ***/
+
+	c.a = 123.45;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long)&c.a;
+
+	/* plfs frt10, 0(r3), 0  */
+	stepped = emulate_step(&regs, TEST_PLFS(10, 3, 0, 0));
+
+	if (stepped == 1)
+		show_result("plfs", "PASS");
+	else
+		show_result("plfs", "FAIL");
+
+
+	/*** pstfs ***/
+
+	c.a = 678.91;
+
+	/* pstfs frs10, 0(r3), 0 */
+	stepped = emulate_step(&regs, TEST_PSTFS(10, 3, 0, 0));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("pstfs", "PASS");
+	else
+		show_result("pstfs", "FAIL");
+}
+
 static void __init test_lfdx_stfdx(void)
 {
 	struct pt_regs regs;
@@ -287,7 +396,7 @@
 	regs.gpr[4] = 0;
 
 	/* lfdx frt10, r3, r4 */
-	stepped = emulate_step(&regs, TEST_LFDX(10, 3, 4));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LFDX(10, 3, 4)));
 
 	if (stepped == 1)
 		show_result("lfdx", "PASS");
@@ -300,13 +409,60 @@
 	c.a = 987654.32;
 
 	/* stfdx frs10, r3, r4 */
-	stepped = emulate_step(&regs, TEST_STFDX(10, 3, 4));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STFDX(10, 3, 4)));
 
 	if (stepped == 1 && c.b == cached_b)
 		show_result("stfdx", "PASS");
 	else
 		show_result("stfdx", "FAIL");
 }
+
+static void __init test_plfd_pstfd(void)
+{
+	struct pt_regs regs;
+	union {
+		double a;
+		long b;
+	} c;
+	long cached_b;
+	int stepped = -1;
+
+	if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+		show_result("pld", "SKIP (!CPU_FTR_ARCH_31)");
+		return;
+	}
+
+	init_pt_regs(&regs);
+
+
+	/*** plfd ***/
+
+	c.a = 123456.78;
+	cached_b = c.b;
+
+	regs.gpr[3] = (unsigned long)&c.a;
+
+	/* plfd frt10, 0(r3), 0 */
+	stepped = emulate_step(&regs, TEST_PLFD(10, 3, 0, 0));
+
+	if (stepped == 1)
+		show_result("plfd", "PASS");
+	else
+		show_result("plfd", "FAIL");
+
+
+	/*** pstfd ***/
+
+	c.a = 987654.32;
+
+	/* pstfd frs10, 0(r3), 0 */
+	stepped = emulate_step(&regs, TEST_PSTFD(10, 3, 0, 0));
+
+	if (stepped == 1 && c.b == cached_b)
+		show_result("pstfd", "PASS");
+	else
+		show_result("pstfd", "FAIL");
+}
 #else
 static void __init test_lfsx_stfsx(void)
 {
@@ -314,11 +470,23 @@
 	show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)");
 }
 
+static void __init test_plfs_pstfs(void)
+{
+	show_result("plfs", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("pstfs", "SKIP (CONFIG_PPC_FPU is not set)");
+}
+
 static void __init test_lfdx_stfdx(void)
 {
 	show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)");
 	show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)");
 }
+
+static void __init test_plfd_pstfd(void)
+{
+	show_result("plfd", "SKIP (CONFIG_PPC_FPU is not set)");
+	show_result("pstfd", "SKIP (CONFIG_PPC_FPU is not set)");
+}
 #endif /* CONFIG_PPC_FPU */
 
 #ifdef CONFIG_ALTIVEC
@@ -346,7 +514,7 @@
 	regs.gpr[4] = 0;
 
 	/* lvx vrt10, r3, r4 */
-	stepped = emulate_step(&regs, TEST_LVX(10, 3, 4));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LVX(10, 3, 4)));
 
 	if (stepped == 1)
 		show_result("lvx", "PASS");
@@ -362,7 +530,7 @@
 	c.b[3] = 498532;
 
 	/* stvx vrs10, r3, r4 */
-	stepped = emulate_step(&regs, TEST_STVX(10, 3, 4));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STVX(10, 3, 4)));
 
 	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
 	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3])
@@ -403,7 +571,7 @@
 	regs.gpr[4] = 0;
 
 	/* lxvd2x vsr39, r3, r4 */
-	stepped = emulate_step(&regs, TEST_LXVD2X(39, 3, 4));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_LXVD2X(39, R3, R4)));
 
 	if (stepped == 1 && cpu_has_feature(CPU_FTR_VSX)) {
 		show_result("lxvd2x", "PASS");
@@ -423,7 +591,7 @@
 	c.b[3] = 4;
 
 	/* stxvd2x vsr39, r3, r4 */
-	stepped = emulate_step(&regs, TEST_STXVD2X(39, 3, 4));
+	stepped = emulate_step(&regs, ppc_inst(PPC_RAW_STXVD2X(39, R3, R4)));
 
 	if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] &&
 	    cached_b[2] == c.b[2] && cached_b[3] == c.b[3] &&
@@ -447,18 +615,24 @@
 static void __init run_tests_load_store(void)
 {
 	test_ld();
+	test_pld();
 	test_lwz();
+	test_plwz();
 	test_lwzx();
 	test_std();
+	test_pstd();
 	test_ldarx_stdcx();
 	test_lfsx_stfsx();
+	test_plfs_pstfs();
 	test_lfdx_stfdx();
+	test_plfd_pstfd();
 	test_lvx_stvx();
 	test_lxvd2x_stxvd2x();
 }
 
 struct compute_test {
 	char *mnemonic;
+	unsigned long cpu_feature;
 	struct {
 		char *descr;
 		unsigned long flags;
@@ -467,6 +641,11 @@
 	} subtests[MAX_SUBTESTS + 1];
 };
 
+/* Extreme values for si0||si1 (the MLS:D-form 34 bit immediate field) */
+#define SI_MIN BIT(33)
+#define SI_MAX (BIT(33) - 1)
+#define SI_UMAX (BIT(34) - 1)
+
 static struct compute_test compute_tests[] = {
 	{
 		.mnemonic = "nop",
@@ -485,7 +664,7 @@
 		.subtests = {
 			{
 				.descr = "RA = LONG_MIN, RB = LONG_MIN",
-				.instr = TEST_ADD(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MIN,
 					.gpr[22] = LONG_MIN,
@@ -493,7 +672,7 @@
 			},
 			{
 				.descr = "RA = LONG_MIN, RB = LONG_MAX",
-				.instr = TEST_ADD(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MIN,
 					.gpr[22] = LONG_MAX,
@@ -501,7 +680,7 @@
 			},
 			{
 				.descr = "RA = LONG_MAX, RB = LONG_MAX",
-				.instr = TEST_ADD(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MAX,
 					.gpr[22] = LONG_MAX,
@@ -509,7 +688,7 @@
 			},
 			{
 				.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
-				.instr = TEST_ADD(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
 				.regs = {
 					.gpr[21] = ULONG_MAX,
 					.gpr[22] = ULONG_MAX,
@@ -517,7 +696,7 @@
 			},
 			{
 				.descr = "RA = ULONG_MAX, RB = 0x1",
-				.instr = TEST_ADD(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
 				.regs = {
 					.gpr[21] = ULONG_MAX,
 					.gpr[22] = 0x1,
@@ -525,7 +704,7 @@
 			},
 			{
 				.descr = "RA = INT_MIN, RB = INT_MIN",
-				.instr = TEST_ADD(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MIN,
 					.gpr[22] = INT_MIN,
@@ -533,7 +712,7 @@
 			},
 			{
 				.descr = "RA = INT_MIN, RB = INT_MAX",
-				.instr = TEST_ADD(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MIN,
 					.gpr[22] = INT_MAX,
@@ -541,7 +720,7 @@
 			},
 			{
 				.descr = "RA = INT_MAX, RB = INT_MAX",
-				.instr = TEST_ADD(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MAX,
 					.gpr[22] = INT_MAX,
@@ -549,7 +728,7 @@
 			},
 			{
 				.descr = "RA = UINT_MAX, RB = UINT_MAX",
-				.instr = TEST_ADD(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
 				.regs = {
 					.gpr[21] = UINT_MAX,
 					.gpr[22] = UINT_MAX,
@@ -557,7 +736,7 @@
 			},
 			{
 				.descr = "RA = UINT_MAX, RB = 0x1",
-				.instr = TEST_ADD(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD(20, 21, 22)),
 				.regs = {
 					.gpr[21] = UINT_MAX,
 					.gpr[22] = 0x1,
@@ -571,7 +750,7 @@
 			{
 				.descr = "RA = LONG_MIN, RB = LONG_MIN",
 				.flags = IGNORE_CCR,
-				.instr = TEST_ADD_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MIN,
 					.gpr[22] = LONG_MIN,
@@ -579,7 +758,7 @@
 			},
 			{
 				.descr = "RA = LONG_MIN, RB = LONG_MAX",
-				.instr = TEST_ADD_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MIN,
 					.gpr[22] = LONG_MAX,
@@ -588,7 +767,7 @@
 			{
 				.descr = "RA = LONG_MAX, RB = LONG_MAX",
 				.flags = IGNORE_CCR,
-				.instr = TEST_ADD_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MAX,
 					.gpr[22] = LONG_MAX,
@@ -596,7 +775,7 @@
 			},
 			{
 				.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
-				.instr = TEST_ADD_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = ULONG_MAX,
 					.gpr[22] = ULONG_MAX,
@@ -604,7 +783,7 @@
 			},
 			{
 				.descr = "RA = ULONG_MAX, RB = 0x1",
-				.instr = TEST_ADD_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = ULONG_MAX,
 					.gpr[22] = 0x1,
@@ -612,7 +791,7 @@
 			},
 			{
 				.descr = "RA = INT_MIN, RB = INT_MIN",
-				.instr = TEST_ADD_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MIN,
 					.gpr[22] = INT_MIN,
@@ -620,7 +799,7 @@
 			},
 			{
 				.descr = "RA = INT_MIN, RB = INT_MAX",
-				.instr = TEST_ADD_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MIN,
 					.gpr[22] = INT_MAX,
@@ -628,7 +807,7 @@
 			},
 			{
 				.descr = "RA = INT_MAX, RB = INT_MAX",
-				.instr = TEST_ADD_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MAX,
 					.gpr[22] = INT_MAX,
@@ -636,7 +815,7 @@
 			},
 			{
 				.descr = "RA = UINT_MAX, RB = UINT_MAX",
-				.instr = TEST_ADD_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = UINT_MAX,
 					.gpr[22] = UINT_MAX,
@@ -644,7 +823,7 @@
 			},
 			{
 				.descr = "RA = UINT_MAX, RB = 0x1",
-				.instr = TEST_ADD_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADD_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = UINT_MAX,
 					.gpr[22] = 0x1,
@@ -657,7 +836,7 @@
 		.subtests = {
 			{
 				.descr = "RA = LONG_MIN, RB = LONG_MIN",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MIN,
 					.gpr[22] = LONG_MIN,
@@ -665,7 +844,7 @@
 			},
 			{
 				.descr = "RA = LONG_MIN, RB = LONG_MAX",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MIN,
 					.gpr[22] = LONG_MAX,
@@ -673,7 +852,7 @@
 			},
 			{
 				.descr = "RA = LONG_MAX, RB = LONG_MAX",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MAX,
 					.gpr[22] = LONG_MAX,
@@ -681,7 +860,7 @@
 			},
 			{
 				.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = ULONG_MAX,
 					.gpr[22] = ULONG_MAX,
@@ -689,7 +868,7 @@
 			},
 			{
 				.descr = "RA = ULONG_MAX, RB = 0x1",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = ULONG_MAX,
 					.gpr[22] = 0x1,
@@ -697,7 +876,7 @@
 			},
 			{
 				.descr = "RA = INT_MIN, RB = INT_MIN",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MIN,
 					.gpr[22] = INT_MIN,
@@ -705,7 +884,7 @@
 			},
 			{
 				.descr = "RA = INT_MIN, RB = INT_MAX",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MIN,
 					.gpr[22] = INT_MAX,
@@ -713,7 +892,7 @@
 			},
 			{
 				.descr = "RA = INT_MAX, RB = INT_MAX",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MAX,
 					.gpr[22] = INT_MAX,
@@ -721,7 +900,7 @@
 			},
 			{
 				.descr = "RA = UINT_MAX, RB = UINT_MAX",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = UINT_MAX,
 					.gpr[22] = UINT_MAX,
@@ -729,7 +908,7 @@
 			},
 			{
 				.descr = "RA = UINT_MAX, RB = 0x1",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = UINT_MAX,
 					.gpr[22] = 0x1,
@@ -737,7 +916,7 @@
 			},
 			{
 				.descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
-				.instr = TEST_ADDC(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MIN | (uint)INT_MIN,
 					.gpr[22] = LONG_MIN | (uint)INT_MIN,
@@ -751,7 +930,7 @@
 			{
 				.descr = "RA = LONG_MIN, RB = LONG_MIN",
 				.flags = IGNORE_CCR,
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MIN,
 					.gpr[22] = LONG_MIN,
@@ -759,7 +938,7 @@
 			},
 			{
 				.descr = "RA = LONG_MIN, RB = LONG_MAX",
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MIN,
 					.gpr[22] = LONG_MAX,
@@ -768,7 +947,7 @@
 			{
 				.descr = "RA = LONG_MAX, RB = LONG_MAX",
 				.flags = IGNORE_CCR,
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MAX,
 					.gpr[22] = LONG_MAX,
@@ -776,7 +955,7 @@
 			},
 			{
 				.descr = "RA = ULONG_MAX, RB = ULONG_MAX",
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = ULONG_MAX,
 					.gpr[22] = ULONG_MAX,
@@ -784,7 +963,7 @@
 			},
 			{
 				.descr = "RA = ULONG_MAX, RB = 0x1",
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = ULONG_MAX,
 					.gpr[22] = 0x1,
@@ -792,7 +971,7 @@
 			},
 			{
 				.descr = "RA = INT_MIN, RB = INT_MIN",
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MIN,
 					.gpr[22] = INT_MIN,
@@ -800,7 +979,7 @@
 			},
 			{
 				.descr = "RA = INT_MIN, RB = INT_MAX",
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MIN,
 					.gpr[22] = INT_MAX,
@@ -808,7 +987,7 @@
 			},
 			{
 				.descr = "RA = INT_MAX, RB = INT_MAX",
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = INT_MAX,
 					.gpr[22] = INT_MAX,
@@ -816,7 +995,7 @@
 			},
 			{
 				.descr = "RA = UINT_MAX, RB = UINT_MAX",
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = UINT_MAX,
 					.gpr[22] = UINT_MAX,
@@ -824,7 +1003,7 @@
 			},
 			{
 				.descr = "RA = UINT_MAX, RB = 0x1",
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = UINT_MAX,
 					.gpr[22] = 0x1,
@@ -832,31 +1011,320 @@
 			},
 			{
 				.descr = "RA = LONG_MIN | INT_MIN, RB = LONG_MIN | INT_MIN",
-				.instr = TEST_ADDC_DOT(20, 21, 22),
+				.instr = ppc_inst(PPC_RAW_ADDC_DOT(20, 21, 22)),
 				.regs = {
 					.gpr[21] = LONG_MIN | (uint)INT_MIN,
 					.gpr[22] = LONG_MIN | (uint)INT_MIN,
 				}
 			}
 		}
+	},
+	{
+		.mnemonic = "divde",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = 1L, RB = 0",
+				.instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = 1L,
+					.gpr[22] = 0,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDE(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "divde.",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = 1L, RB = 0",
+				.instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = 1L,
+					.gpr[22] = 0,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDE_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "divdeu",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = 1L, RB = 0",
+				.instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = 1L,
+					.gpr[22] = 0,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MAX - 1, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MAX - 1,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN + 1, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDEU(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = LONG_MIN + 1,
+					.gpr[22] = LONG_MIN,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "divdeu.",
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = 1L, RB = 0",
+				.instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = 1L,
+					.gpr[22] = 0,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MIN,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MAX - 1, RB = LONG_MAX",
+				.instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+				.regs = {
+					.gpr[21] = LONG_MAX - 1,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN + 1, RB = LONG_MIN",
+				.instr = ppc_inst(PPC_RAW_DIVDEU_DOT(20, 21, 22)),
+				.flags = IGNORE_GPR(20),
+				.regs = {
+					.gpr[21] = LONG_MIN + 1,
+					.gpr[22] = LONG_MIN,
+				}
+			}
+		}
+	},
+	{
+		.mnemonic = "paddi",
+		.cpu_feature = CPU_FTR_ARCH_31,
+		.subtests = {
+			{
+				.descr = "RA = LONG_MIN, SI = SI_MIN, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MIN, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = LONG_MIN, SI = SI_MAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = LONG_MIN,
+				}
+			},
+			{
+				.descr = "RA = LONG_MAX, SI = SI_MAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = LONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, SI = SI_UMAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_UMAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = ULONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = ULONG_MAX, SI = 0x1, R = 0",
+				.instr = TEST_PADDI(21, 22, 0x1, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = ULONG_MAX,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, SI = SI_MIN, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MIN, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = INT_MIN,
+				}
+			},
+			{
+				.descr = "RA = INT_MIN, SI = SI_MAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = INT_MIN,
+				}
+			},
+			{
+				.descr = "RA = INT_MAX, SI = SI_MAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = INT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, SI = 0x1, R = 0",
+				.instr = TEST_PADDI(21, 22, 0x1, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = UINT_MAX,
+				}
+			},
+			{
+				.descr = "RA = UINT_MAX, SI = SI_MAX, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MAX, 0),
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = UINT_MAX,
+				}
+			},
+			{
+				.descr = "RA is r0, SI = SI_MIN, R = 0",
+				.instr = TEST_PADDI(21, 0, SI_MIN, 0),
+				.regs = {
+					.gpr[21] = 0x0,
+				}
+			},
+			{
+				.descr = "RA = 0, SI = SI_MIN, R = 0",
+				.instr = TEST_PADDI(21, 22, SI_MIN, 0),
+				.regs = {
+					.gpr[21] = 0x0,
+					.gpr[22] = 0x0,
+				}
+			},
+			{
+				.descr = "RA is r0, SI = 0, R = 1",
+				.instr = TEST_PADDI(21, 0, 0, 1),
+				.regs = {
+					.gpr[21] = 0,
+				}
+			},
+			{
+				.descr = "RA is r0, SI = SI_MIN, R = 1",
+				.instr = TEST_PADDI(21, 0, SI_MIN, 1),
+				.regs = {
+					.gpr[21] = 0,
+				}
+			},
+			/* Invalid instruction form with R = 1 and RA != 0 */
+			{
+				.descr = "RA = R22(0), SI = 0, R = 1",
+				.instr = TEST_PADDI(21, 22, 0, 1),
+				.flags = NEGATIVE_TEST,
+				.regs = {
+					.gpr[21] = 0,
+					.gpr[22] = 0,
+				}
+			}
+		}
 	}
 };
 
 static int __init emulate_compute_instr(struct pt_regs *regs,
-					struct ppc_inst instr)
+					struct ppc_inst instr,
+					bool negative)
 {
+	int analysed;
 	struct instruction_op op;
 
 	if (!regs || !ppc_inst_val(instr))
 		return -EINVAL;
 
-	if (analyse_instr(&op, regs, instr) != 1 ||
-	    GETTYPE(op.type) != COMPUTE) {
-		pr_info("emulation failed, instruction = 0x%08x\n", ppc_inst_val(instr));
+	regs->nip = patch_site_addr(&patch__exec_instr);
+
+	analysed = analyse_instr(&op, regs, instr);
+	if (analysed != 1 || GETTYPE(op.type) != COMPUTE) {
+		if (negative)
+			return -EFAULT;
+		pr_info("emulation failed, instruction = %s\n", ppc_inst_as_str(instr));
 		return -EFAULT;
 	}
-
-	emulate_update_regs(regs, &op);
+	if (analysed == 1 && negative)
+		pr_info("negative test failed, instruction = %s\n", ppc_inst_as_str(instr));
+	if (!negative)
+		emulate_update_regs(regs, &op);
 	return 0;
 }
 
@@ -864,7 +1332,6 @@
 					struct ppc_inst instr)
 {
 	extern int exec_instr(struct pt_regs *regs);
-	extern s32 patch__exec_instr;
 
 	if (!regs || !ppc_inst_val(instr))
 		return -EINVAL;
@@ -872,7 +1339,7 @@
 	/* Patch the NOP with the actual instruction */
 	patch_instruction_site(&patch__exec_instr, instr);
 	if (exec_instr(regs)) {
-		pr_info("execution failed, instruction = 0x%08x\n", ppc_inst_val(instr));
+		pr_info("execution failed, instruction = %s\n", ppc_inst_as_str(instr));
 		return -EFAULT;
 	}
 
@@ -894,15 +1361,21 @@
 	struct pt_regs *regs, exp, got;
 	unsigned int i, j, k;
 	struct ppc_inst instr;
-	bool ignore_gpr, ignore_xer, ignore_ccr, passed;
+	bool ignore_gpr, ignore_xer, ignore_ccr, passed, rc, negative;
 
 	for (i = 0; i < ARRAY_SIZE(compute_tests); i++) {
 		test = &compute_tests[i];
 
+		if (test->cpu_feature && !early_cpu_has_feature(test->cpu_feature)) {
+			show_result(test->mnemonic, "SKIP (!CPU_FTR)");
+			continue;
+		}
+
 		for (j = 0; j < MAX_SUBTESTS && test->subtests[j].descr; j++) {
 			instr = test->subtests[j].instr;
 			flags = test->subtests[j].flags;
 			regs = &test->subtests[j].regs;
+			negative = flags & NEGATIVE_TEST;
 			ignore_xer = flags & IGNORE_XER;
 			ignore_ccr = flags & IGNORE_CCR;
 			passed = true;
@@ -917,8 +1390,12 @@
 			exp.msr = MSR_KERNEL;
 			got.msr = MSR_KERNEL;
 
-			if (emulate_compute_instr(&got, instr) ||
-			    execute_compute_instr(&exp, instr)) {
+			rc = emulate_compute_instr(&got, instr, negative) != 0;
+			if (negative) {
+				/* skip executing instruction */
+				passed = rc;
+				goto print;
+			} else if (rc || execute_compute_instr(&exp, instr)) {
 				passed = false;
 				goto print;
 			}
diff --git a/arch/powerpc/lib/test_emulate_step_exec_instr.S b/arch/powerpc/lib/test_emulate_step_exec_instr.S
index 1580f34..9ef941d 100644
--- a/arch/powerpc/lib/test_emulate_step_exec_instr.S
+++ b/arch/powerpc/lib/test_emulate_step_exec_instr.S
@@ -80,7 +80,9 @@
 	REST_NVGPRS(r31)
 
 	/* Placeholder for the test instruction */
+	.balign 64
 1:	nop
+	nop
 	patch_site 1b patch__exec_instr
 
 	/*
diff --git a/arch/powerpc/mm/book3s32/hash_low.S b/arch/powerpc/mm/book3s32/hash_low.S
index 923ad8f..1690d36 100644
--- a/arch/powerpc/mm/book3s32/hash_low.S
+++ b/arch/powerpc/mm/book3s32/hash_low.S
@@ -62,7 +62,7 @@
 	isync
 #endif
 	/* Get PTE (linux-style) and check access */
-	lis	r0,KERNELBASE@h		/* check if kernel address */
+	lis	r0, TASK_SIZE@h		/* check if kernel address */
 	cmplw	0,r4,r0
 	ori	r3,r3,_PAGE_USER|_PAGE_PRESENT /* test low addresses as user */
 	mfspr	r5, SPRN_SPRG_PGDIR	/* phys page-table root */
diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c
index 03b6ba5..c016291 100644
--- a/arch/powerpc/mm/book3s32/mmu.c
+++ b/arch/powerpc/mm/book3s32/mmu.c
@@ -187,6 +187,17 @@
 	return __mmu_mapin_ram(border, top);
 }
 
+static bool is_module_segment(unsigned long addr)
+{
+	if (!IS_ENABLED(CONFIG_MODULES))
+		return false;
+	if (addr < ALIGN_DOWN(VMALLOC_START, SZ_256M))
+		return false;
+	if (addr >= ALIGN(VMALLOC_END, SZ_256M))
+		return false;
+	return true;
+}
+
 void mmu_mark_initmem_nx(void)
 {
 	int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4;
@@ -223,9 +234,9 @@
 
 	for (i = TASK_SIZE >> 28; i < 16; i++) {
 		/* Do not set NX on VM space for modules */
-		if (IS_ENABLED(CONFIG_MODULES) &&
-		    (VMALLOC_START & 0xf0000000) == i << 28)
-			break;
+		if (is_module_segment(i << 28))
+			continue;
+
 		mtsrin(mfsrin(i << 28) | 0x10000000, i << 28);
 	}
 }
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c
index 9b9f92a..1478fce 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -232,8 +232,6 @@
 		rflags |= HPTE_R_I;
 	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT)
 		rflags |= (HPTE_R_I | HPTE_R_G);
-	else if ((pteflags & _PAGE_CACHE_CTL) == _PAGE_SAO)
-		rflags |= (HPTE_R_W | HPTE_R_I | HPTE_R_M);
 	else
 		/*
 		 * Add memory coherence if cache inhibited is not set
@@ -596,7 +594,7 @@
 	}
 
 #ifdef CONFIG_HUGETLB_PAGE
-	if (!hugetlb_disabled) {
+	if (!hugetlb_disabled && !early_radix_enabled() ) {
 		/* Reserve 16G huge page memory sections for huge pages */
 		of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
 	}
@@ -663,11 +661,10 @@
 		 * Pick a size for the linear mapping. Currently, we only
 		 * support 16M, 1M and 4K which is the default
 		 */
-		if (IS_ENABLED(STRICT_KERNEL_RWX) &&
+		if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) &&
 		    (unsigned long)_stext % 0x1000000) {
 			if (mmu_psize_defs[MMU_PAGE_16M].shift)
-				pr_warn("Kernel not 16M aligned, "
-					"disabling 16M linear map alignment");
+				pr_warn("Kernel not 16M aligned, disabling 16M linear map alignment\n");
 			aligned = false;
 		}
 
@@ -788,7 +785,7 @@
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-int resize_hpt_for_hotplug(unsigned long new_mem_size)
+static int resize_hpt_for_hotplug(unsigned long new_mem_size)
 {
 	unsigned target_hpt_shift;
 
@@ -822,6 +819,8 @@
 		return -1;
 	}
 
+	resize_hpt_for_hotplug(memblock_phys_mem_size());
+
 	rc = htab_bolt_mapping(start, end, __pa(start),
 			       pgprot_val(prot), mmu_linear_psize,
 			       mmu_kernel_ssize);
@@ -839,6 +838,10 @@
 	int rc = htab_remove_mapping(start, end, mmu_linear_psize,
 				     mmu_kernel_ssize);
 	WARN_ON(rc < 0);
+
+	if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
+		pr_warn("Hash collision while resizing HPT\n");
+
 	return rc;
 }
 #endif /* CONFIG_MEMORY_HOTPLUG */
@@ -1111,6 +1114,10 @@
 	if (cpu_has_feature(CPU_FTR_ARCH_206)
 			&& cpu_has_feature(CPU_FTR_HVMODE))
 		tlbiel_all();
+
+#ifdef CONFIG_PPC_MEM_KEYS
+	mtspr(SPRN_UAMOR, default_uamor);
+#endif
 }
 #endif /* CONFIG_SMP */
 
@@ -1731,10 +1738,6 @@
 	return gslot;
 }
 
-/*
- * WARNING: This is called from hash_low_64.S, if you change this prototype,
- *          do not forget to update the assembly call site !
- */
 void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize,
 		     unsigned long flags)
 {
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c
index c58ad10..e18ae50 100644
--- a/arch/powerpc/mm/book3s64/pgtable.c
+++ b/arch/powerpc/mm/book3s64/pgtable.c
@@ -15,6 +15,7 @@
 #include <asm/powernv.h>
 #include <asm/firmware.h>
 #include <asm/ultravisor.h>
+#include <asm/kexec.h>
 
 #include <mm/mmu_decl.h>
 #include <trace/events/thp.h>
@@ -165,6 +166,8 @@
 		radix__mmu_cleanup_all();
 	else if (mmu_hash_ops.hpte_clear_all)
 		mmu_hash_ops.hpte_clear_all();
+
+	reset_sprs();
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -339,6 +342,9 @@
 {
 	struct page *page = virt_to_page(pmd);
 
+	if (PageReserved(page))
+		return free_reserved_page(page);
+
 	BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
 	if (atomic_dec_and_test(&page->pt_frag_refcount)) {
 		pgtable_pmd_page_dtor(page);
@@ -356,7 +362,7 @@
 		pmd_fragment_free(table);
 		break;
 	case PUD_INDEX:
-		kmem_cache_free(PGT_CACHE(PUD_CACHE_INDEX), table);
+		__pud_free(table);
 		break;
 #if defined(CONFIG_PPC_4K_PAGES) && defined(CONFIG_HUGETLB_PAGE)
 		/* 16M hugepd directory at pud level */
diff --git a/arch/powerpc/mm/book3s64/pkeys.c b/arch/powerpc/mm/book3s64/pkeys.c
index d174106..69a6b87 100644
--- a/arch/powerpc/mm/book3s64/pkeys.c
+++ b/arch/powerpc/mm/book3s64/pkeys.c
@@ -10,58 +10,103 @@
 #include <asm/mmu.h>
 #include <asm/setup.h>
 #include <linux/pkeys.h>
-#include <linux/of_device.h>
+#include <linux/of_fdt.h>
 
-DEFINE_STATIC_KEY_TRUE(pkey_disabled);
-int  pkeys_total;		/* Total pkeys as per device tree */
-u32  initial_allocation_mask;   /* Bits set for the initially allocated keys */
-u32  reserved_allocation_mask;  /* Bits set for reserved keys */
-static bool pkey_execute_disable_supported;
-static bool pkeys_devtree_defined;	/* property exported by device tree */
-static u64 pkey_amr_mask;		/* Bits in AMR not to be touched */
-static u64 pkey_iamr_mask;		/* Bits in AMR not to be touched */
-static u64 pkey_uamor_mask;		/* Bits in UMOR not to be touched */
+int  num_pkey;		/* Max number of pkeys supported */
+/*
+ *  Keys marked in the reservation list cannot be allocated by  userspace
+ */
+u32 reserved_allocation_mask __ro_after_init;
+
+/* Bits set for the initially allocated keys */
+static u32 initial_allocation_mask __ro_after_init;
+
+/*
+ * Even if we allocate keys with sys_pkey_alloc(), we need to make sure
+ * other thread still find the access denied using the same keys.
+ */
+static u64 default_amr = ~0x0UL;
+static u64 default_iamr = 0x5555555555555555UL;
+u64 default_uamor __ro_after_init;
+/*
+ * Key used to implement PROT_EXEC mmap. Denies READ/WRITE
+ * We pick key 2 because 0 is special key and 1 is reserved as per ISA.
+ */
 static int execute_only_key = 2;
+static bool pkey_execute_disable_supported;
+
 
 #define AMR_BITS_PER_PKEY 2
 #define AMR_RD_BIT 0x1UL
 #define AMR_WR_BIT 0x2UL
 #define IAMR_EX_BIT 0x1UL
-#define PKEY_REG_BITS (sizeof(u64)*8)
+#define PKEY_REG_BITS (sizeof(u64) * 8)
 #define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey+1) * AMR_BITS_PER_PKEY))
 
-static void scan_pkey_feature(void)
+static int __init dt_scan_storage_keys(unsigned long node,
+				       const char *uname, int depth,
+				       void *data)
 {
-	u32 vals[2];
-	struct device_node *cpu;
+	const char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+	const __be32 *prop;
+	int *pkeys_total = (int *) data;
 
-	cpu = of_find_node_by_type(NULL, "cpu");
-	if (!cpu)
-		return;
+	/* We are scanning "cpu" nodes only */
+	if (type == NULL || strcmp(type, "cpu") != 0)
+		return 0;
 
-	if (of_property_read_u32_array(cpu,
-			"ibm,processor-storage-keys", vals, 2))
-		return;
+	prop = of_get_flat_dt_prop(node, "ibm,processor-storage-keys", NULL);
+	if (!prop)
+		return 0;
+	*pkeys_total = be32_to_cpu(prop[0]);
+	return 1;
+}
+
+static int scan_pkey_feature(void)
+{
+	int ret;
+	int pkeys_total = 0;
 
 	/*
-	 * Since any pkey can be used for data or execute, we will just treat
-	 * all keys as equal and track them as one entity.
+	 * Pkey is not supported with Radix translation.
 	 */
-	pkeys_total = vals[0];
-	pkeys_devtree_defined = true;
+	if (early_radix_enabled())
+		return 0;
+
+	/*
+	 * Only P7 and above supports SPRN_AMR update with MSR[PR] = 1
+	 */
+	if (!early_cpu_has_feature(CPU_FTR_ARCH_206))
+		return 0;
+
+	ret = of_scan_flat_dt(dt_scan_storage_keys, &pkeys_total);
+	if (ret == 0) {
+		/*
+		 * Let's assume 32 pkeys on P8/P9 bare metal, if its not defined by device
+		 * tree. We make this exception since some version of skiboot forgot to
+		 * expose this property on power8/9.
+		 */
+		if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+			unsigned long pvr = mfspr(SPRN_PVR);
+
+			if (PVR_VER(pvr) == PVR_POWER8 || PVR_VER(pvr) == PVR_POWER8E ||
+			    PVR_VER(pvr) == PVR_POWER8NVL || PVR_VER(pvr) == PVR_POWER9)
+				pkeys_total = 32;
+		}
+	}
+
+	/*
+	 * Adjust the upper limit, based on the number of bits supported by
+	 * arch-neutral code.
+	 */
+	pkeys_total = min_t(int, pkeys_total,
+			    ((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT) + 1));
+	return pkeys_total;
 }
 
-static inline bool pkey_mmu_enabled(void)
+void __init pkey_early_init_devtree(void)
 {
-	if (firmware_has_feature(FW_FEATURE_LPAR))
-		return pkeys_total;
-	else
-		return cpu_has_feature(CPU_FTR_PKEY);
-}
-
-static int pkey_initialize(void)
-{
-	int os_reserved, i;
+	int pkeys_total, i;
 
 	/*
 	 * We define PKEY_DISABLE_EXECUTE in addition to the arch-neutral
@@ -80,31 +125,14 @@
 				!= (sizeof(u64) * BITS_PER_BYTE));
 
 	/* scan the device tree for pkey feature */
-	scan_pkey_feature();
+	pkeys_total = scan_pkey_feature();
+	if (!pkeys_total)
+		goto out;
 
-	/*
-	 * Let's assume 32 pkeys on P8 bare metal, if its not defined by device
-	 * tree. We make this exception since skiboot forgot to expose this
-	 * property on power8.
-	 */
-	if (!pkeys_devtree_defined && !firmware_has_feature(FW_FEATURE_LPAR) &&
-			cpu_has_feature(CPU_FTRS_POWER8))
-		pkeys_total = 32;
+	/* Allow all keys to be modified by default */
+	default_uamor = ~0x0UL;
 
-	/*
-	 * Adjust the upper limit, based on the number of bits supported by
-	 * arch-neutral code.
-	 */
-	pkeys_total = min_t(int, pkeys_total,
-			((ARCH_VM_PKEY_FLAGS >> VM_PKEY_SHIFT)+1));
-
-	if (!pkey_mmu_enabled() || radix_enabled() || !pkeys_total)
-		static_branch_enable(&pkey_disabled);
-	else
-		static_branch_disable(&pkey_disabled);
-
-	if (static_branch_likely(&pkey_disabled))
-		return 0;
+	cur_cpu_spec->mmu_features |= MMU_FTR_PKEY;
 
 	/*
 	 * The device tree cannot be relied to indicate support for
@@ -118,53 +146,86 @@
 #ifdef CONFIG_PPC_4K_PAGES
 	/*
 	 * The OS can manage only 8 pkeys due to its inability to represent them
-	 * in the Linux 4K PTE.
+	 * in the Linux 4K PTE. Mark all other keys reserved.
 	 */
-	os_reserved = pkeys_total - 8;
+	num_pkey = min(8, pkeys_total);
 #else
-	os_reserved = 0;
+	num_pkey = pkeys_total;
 #endif
-	/* Bits are in LE format. */
-	reserved_allocation_mask = (0x1 << 1) | (0x1 << execute_only_key);
 
-	/* register mask is in BE format */
-	pkey_amr_mask = ~0x0ul;
-	pkey_amr_mask &= ~(0x3ul << pkeyshift(0));
-
-	pkey_iamr_mask = ~0x0ul;
-	pkey_iamr_mask &= ~(0x3ul << pkeyshift(0));
-	pkey_iamr_mask &= ~(0x3ul << pkeyshift(execute_only_key));
-
-	pkey_uamor_mask = ~0x0ul;
-	pkey_uamor_mask &= ~(0x3ul << pkeyshift(0));
-	pkey_uamor_mask &= ~(0x3ul << pkeyshift(execute_only_key));
-
-	/* mark the rest of the keys as reserved and hence unavailable */
-	for (i = (pkeys_total - os_reserved); i < pkeys_total; i++) {
-		reserved_allocation_mask |= (0x1 << i);
-		pkey_uamor_mask &= ~(0x3ul << pkeyshift(i));
-	}
-	initial_allocation_mask = reserved_allocation_mask | (0x1 << 0);
-
-	if (unlikely((pkeys_total - os_reserved) <= execute_only_key)) {
+	if (unlikely(num_pkey <= execute_only_key) || !pkey_execute_disable_supported) {
 		/*
 		 * Insufficient number of keys to support
 		 * execute only key. Mark it unavailable.
-		 * Any AMR, UAMOR, IAMR bit set for
-		 * this key is irrelevant since this key
-		 * can never be allocated.
 		 */
 		execute_only_key = -1;
+	} else {
+		/*
+		 * Mark the execute_only_pkey as not available for
+		 * user allocation via pkey_alloc.
+		 */
+		reserved_allocation_mask |= (0x1 << execute_only_key);
+
+		/*
+		 * Deny READ/WRITE for execute_only_key.
+		 * Allow execute in IAMR.
+		 */
+		default_amr  |= (0x3ul << pkeyshift(execute_only_key));
+		default_iamr &= ~(0x1ul << pkeyshift(execute_only_key));
+
+		/*
+		 * Clear the uamor bits for this key.
+		 */
+		default_uamor &= ~(0x3ul << pkeyshift(execute_only_key));
 	}
 
-	return 0;
-}
+	/*
+	 * Allow access for only key 0. And prevent any other modification.
+	 */
+	default_amr   &= ~(0x3ul << pkeyshift(0));
+	default_iamr  &= ~(0x1ul << pkeyshift(0));
+	default_uamor &= ~(0x3ul << pkeyshift(0));
+	/*
+	 * key 0 is special in that we want to consider it an allocated
+	 * key which is preallocated. We don't allow changing AMR bits
+	 * w.r.t key 0. But one can pkey_free(key0)
+	 */
+	initial_allocation_mask |= (0x1 << 0);
 
-arch_initcall(pkey_initialize);
+	/*
+	 * key 1 is recommended not to be used. PowerISA(3.0) page 1015,
+	 * programming note.
+	 */
+	reserved_allocation_mask |= (0x1 << 1);
+	default_uamor &= ~(0x3ul << pkeyshift(1));
+
+	/*
+	 * Prevent the usage of OS reserved keys. Update UAMOR
+	 * for those keys. Also mark the rest of the bits in the
+	 * 32 bit mask as reserved.
+	 */
+	for (i = num_pkey; i < 32 ; i++) {
+		reserved_allocation_mask |= (0x1 << i);
+		default_uamor &= ~(0x3ul << pkeyshift(i));
+	}
+	/*
+	 * Prevent the allocation of reserved keys too.
+	 */
+	initial_allocation_mask |= reserved_allocation_mask;
+
+	pr_info("Enabling pkeys with max key count %d\n", num_pkey);
+out:
+	/*
+	 * Setup uamor on boot cpu
+	 */
+	mtspr(SPRN_UAMOR, default_uamor);
+
+	return;
+}
 
 void pkey_mm_init(struct mm_struct *mm)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return;
 	mm_pkey_allocation_map(mm) = initial_allocation_mask;
 	mm->context.execute_only_pkey = execute_only_key;
@@ -196,30 +257,6 @@
 	mtspr(SPRN_IAMR, value);
 }
 
-static inline u64 read_uamor(void)
-{
-	return mfspr(SPRN_UAMOR);
-}
-
-static inline void write_uamor(u64 value)
-{
-	mtspr(SPRN_UAMOR, value);
-}
-
-static bool is_pkey_enabled(int pkey)
-{
-	u64 uamor = read_uamor();
-	u64 pkey_bits = 0x3ul << pkeyshift(pkey);
-	u64 uamor_pkey_bits = (uamor & pkey_bits);
-
-	/*
-	 * Both the bits in UAMOR corresponding to the key should be set or
-	 * reset.
-	 */
-	WARN_ON(uamor_pkey_bits && (uamor_pkey_bits != pkey_bits));
-	return !!(uamor_pkey_bits);
-}
-
 static inline void init_amr(int pkey, u8 init_bits)
 {
 	u64 new_amr_bits = (((u64)init_bits & 0x3UL) << pkeyshift(pkey));
@@ -245,8 +282,18 @@
 {
 	u64 new_amr_bits = 0x0ul;
 	u64 new_iamr_bits = 0x0ul;
+	u64 pkey_bits, uamor_pkey_bits;
 
-	if (!is_pkey_enabled(pkey))
+	/*
+	 * Check whether the key is disabled by UAMOR.
+	 */
+	pkey_bits = 0x3ul << pkeyshift(pkey);
+	uamor_pkey_bits = (default_uamor & pkey_bits);
+
+	/*
+	 * Both the bits in UAMOR corresponding to the key should be set
+	 */
+	if (uamor_pkey_bits != pkey_bits)
 		return -EINVAL;
 
 	if (init_val & PKEY_DISABLE_EXECUTE) {
@@ -268,7 +315,7 @@
 
 void thread_pkey_regs_save(struct thread_struct *thread)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return;
 
 	/*
@@ -276,38 +323,33 @@
 	 */
 	thread->amr = read_amr();
 	thread->iamr = read_iamr();
-	thread->uamor = read_uamor();
 }
 
 void thread_pkey_regs_restore(struct thread_struct *new_thread,
 			      struct thread_struct *old_thread)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return;
 
 	if (old_thread->amr != new_thread->amr)
 		write_amr(new_thread->amr);
 	if (old_thread->iamr != new_thread->iamr)
 		write_iamr(new_thread->iamr);
-	if (old_thread->uamor != new_thread->uamor)
-		write_uamor(new_thread->uamor);
 }
 
 void thread_pkey_regs_init(struct thread_struct *thread)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return;
 
-	thread->amr = pkey_amr_mask;
-	thread->iamr = pkey_iamr_mask;
-	thread->uamor = pkey_uamor_mask;
+	thread->amr   = default_amr;
+	thread->iamr  = default_iamr;
 
-	write_uamor(pkey_uamor_mask);
-	write_amr(pkey_amr_mask);
-	write_iamr(pkey_iamr_mask);
+	write_amr(default_amr);
+	write_iamr(default_iamr);
 }
 
-int __execute_only_pkey(struct mm_struct *mm)
+int execute_only_pkey(struct mm_struct *mm)
 {
 	return mm->context.execute_only_pkey;
 }
@@ -366,7 +408,7 @@
 
 bool arch_pte_access_permitted(u64 pte, bool write, bool execute)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return true;
 
 	return pkey_access_permitted(pte_to_pkey_bits(pte), write, execute);
@@ -383,7 +425,7 @@
 bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write,
 			       bool execute, bool foreign)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return true;
 	/*
 	 * Do not enforce our key-permissions on a foreign vma.
@@ -396,7 +438,7 @@
 
 void arch_dup_pkeys(struct mm_struct *oldmm, struct mm_struct *mm)
 {
-	if (static_branch_likely(&pkey_disabled))
+	if (!mmu_has_feature(MMU_FTR_PKEY))
 		return;
 
 	/* Duplicate the oldmm pkey state in mm: */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c
index bb00e0c..28c7849 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -15,7 +15,7 @@
 #include <linux/mm.h>
 #include <linux/hugetlb.h>
 #include <linux/string_helpers.h>
-#include <linux/stop_machine.h>
+#include <linux/memory.h>
 
 #include <asm/pgalloc.h>
 #include <asm/mmu_context.h>
@@ -34,6 +34,7 @@
 
 unsigned int mmu_pid_bits;
 unsigned int mmu_base_pid;
+unsigned int radix_mem_block_size __ro_after_init;
 
 static __ref void *early_alloc_pgtable(unsigned long size, int nid,
 			unsigned long region_start, unsigned long region_end)
@@ -56,6 +57,13 @@
 	return ptr;
 }
 
+/*
+ * When allocating pud or pmd pointers, we allocate a complete page
+ * of PAGE_SIZE rather than PUD_TABLE_SIZE or PMD_TABLE_SIZE. This
+ * is to ensure that the page obtained from the memblock allocator
+ * can be completely used as page table page and can be freed
+ * correctly when the page table entries are removed.
+ */
 static int early_map_kernel_page(unsigned long ea, unsigned long pa,
 			  pgprot_t flags,
 			  unsigned int map_page_size,
@@ -72,8 +80,8 @@
 	pgdp = pgd_offset_k(ea);
 	p4dp = p4d_offset(pgdp, ea);
 	if (p4d_none(*p4dp)) {
-		pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid,
-						region_start, region_end);
+		pudp = early_alloc_pgtable(PAGE_SIZE, nid,
+					   region_start, region_end);
 		p4d_populate(&init_mm, p4dp, pudp);
 	}
 	pudp = pud_offset(p4dp, ea);
@@ -82,8 +90,8 @@
 		goto set_the_pte;
 	}
 	if (pud_none(*pudp)) {
-		pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid,
-						region_start, region_end);
+		pmdp = early_alloc_pgtable(PAGE_SIZE, nid, region_start,
+					   region_end);
 		pud_populate(&init_mm, pudp, pmdp);
 	}
 	pmdp = pmd_offset(pudp, ea);
@@ -259,6 +267,7 @@
 
 static int __meminit create_physical_mapping(unsigned long start,
 					     unsigned long end,
+					     unsigned long max_mapping_size,
 					     int nid, pgprot_t _prot)
 {
 	unsigned long vaddr, addr, mapping_size = 0;
@@ -272,6 +281,8 @@
 		int rc;
 
 		gap = next_boundary(addr, end) - addr;
+		if (gap > max_mapping_size)
+			gap = max_mapping_size;
 		previous_size = mapping_size;
 		prev_exec = exec;
 
@@ -322,8 +333,9 @@
 
 	/* We don't support slb for radix */
 	mmu_slb_size = 0;
+
 	/*
-	 * Create the linear mapping, using standard page size for now
+	 * Create the linear mapping
 	 */
 	for_each_memblock(memory, reg) {
 		/*
@@ -339,6 +351,7 @@
 
 		WARN_ON(create_physical_mapping(reg->base,
 						reg->base + reg->size,
+						radix_mem_block_size,
 						-1, PAGE_KERNEL));
 	}
 
@@ -479,6 +492,57 @@
 	return 1;
 }
 
+#ifdef CONFIG_MEMORY_HOTPLUG
+static int __init probe_memory_block_size(unsigned long node, const char *uname, int
+					  depth, void *data)
+{
+	unsigned long *mem_block_size = (unsigned long *)data;
+	const __be64 *prop;
+	int len;
+
+	if (depth != 1)
+		return 0;
+
+	if (strcmp(uname, "ibm,dynamic-reconfiguration-memory"))
+		return 0;
+
+	prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
+	if (!prop || len < sizeof(__be64))
+		/*
+		 * Nothing in the device tree
+		 */
+		*mem_block_size = MIN_MEMORY_BLOCK_SIZE;
+	else
+		*mem_block_size = be64_to_cpup(prop);
+	return 1;
+}
+
+static unsigned long radix_memory_block_size(void)
+{
+	unsigned long mem_block_size = MIN_MEMORY_BLOCK_SIZE;
+
+	/*
+	 * OPAL firmware feature is set by now. Hence we are ok
+	 * to test OPAL feature.
+	 */
+	if (firmware_has_feature(FW_FEATURE_OPAL))
+		mem_block_size = 1UL * 1024 * 1024 * 1024;
+	else
+		of_scan_flat_dt(probe_memory_block_size, &mem_block_size);
+
+	return mem_block_size;
+}
+
+#else   /* CONFIG_MEMORY_HOTPLUG */
+
+static unsigned long radix_memory_block_size(void)
+{
+	return 1UL * 1024 * 1024 * 1024;
+}
+
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
+
 void __init radix__early_init_devtree(void)
 {
 	int rc;
@@ -487,17 +551,27 @@
 	 * Try to find the available page sizes in the device-tree
 	 */
 	rc = of_scan_flat_dt(radix_dt_scan_page_sizes, NULL);
-	if (rc != 0)  /* Found */
-		goto found;
-	/*
-	 * let's assume we have page 4k and 64k support
-	 */
-	mmu_psize_defs[MMU_PAGE_4K].shift = 12;
-	mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
+	if (!rc) {
+		/*
+		 * No page size details found in device tree.
+		 * Let's assume we have page 4k and 64k support
+		 */
+		mmu_psize_defs[MMU_PAGE_4K].shift = 12;
+		mmu_psize_defs[MMU_PAGE_4K].ap = 0x0;
 
-	mmu_psize_defs[MMU_PAGE_64K].shift = 16;
-	mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
-found:
+		mmu_psize_defs[MMU_PAGE_64K].shift = 16;
+		mmu_psize_defs[MMU_PAGE_64K].ap = 0x5;
+	}
+
+	/*
+	 * Max mapping size used when mapping pages. We don't use
+	 * ppc_md.memory_block_size() here because this get called
+	 * early and we don't have machine probe called yet. Also
+	 * the pseries implementation only check for ibm,lmb-size.
+	 * All hypervisor supporting radix do expose that device
+	 * tree node.
+	 */
+	radix_mem_block_size = radix_memory_block_size();
 	return;
 }
 
@@ -519,8 +593,10 @@
 	if (disabled || !early_radix_enabled())
 		return;
 
-	if (smp_processor_id() == boot_cpuid)
+	if (smp_processor_id() == boot_cpuid) {
 		pr_info("Activating Kernel Userspace Execution Prevention\n");
+		cur_cpu_spec->mmu_features |= MMU_FTR_KUEP;
+	}
 
 	/*
 	 * Radix always uses key0 of the IAMR to determine if an access is
@@ -544,6 +620,10 @@
 
 	/* Make sure userspace can't change the AMR */
 	mtspr(SPRN_UAMOR, 0);
+
+	/*
+	 * Set the default kernel AMR values on all cpus.
+	 */
 	mtspr(SPRN_AMR, AMR_KUAP_BLOCKED);
 	isync();
 }
@@ -700,30 +780,19 @@
 	pud_clear(pud);
 }
 
-struct change_mapping_params {
-	pte_t *pte;
-	unsigned long start;
-	unsigned long end;
-	unsigned long aligned_start;
-	unsigned long aligned_end;
-};
-
-static int __meminit stop_machine_change_mapping(void *data)
+static void free_pud_table(pud_t *pud_start, p4d_t *p4d)
 {
-	struct change_mapping_params *params =
-			(struct change_mapping_params *)data;
+	pud_t *pud;
+	int i;
 
-	if (!data)
-		return -1;
+	for (i = 0; i < PTRS_PER_PUD; i++) {
+		pud = pud_start + i;
+		if (!pud_none(*pud))
+			return;
+	}
 
-	spin_unlock(&init_mm.page_table_lock);
-	pte_clear(&init_mm, params->aligned_start, params->pte);
-	create_physical_mapping(__pa(params->aligned_start),
-				__pa(params->start), -1, PAGE_KERNEL);
-	create_physical_mapping(__pa(params->end), __pa(params->aligned_end),
-				-1, PAGE_KERNEL);
-	spin_lock(&init_mm.page_table_lock);
-	return 0;
+	pud_free(&init_mm, pud_start);
+	p4d_clear(p4d);
 }
 
 static void remove_pte_table(pte_t *pte_start, unsigned long addr,
@@ -754,53 +823,7 @@
 	}
 }
 
-/*
- * clear the pte and potentially split the mapping helper
- */
-static void __meminit split_kernel_mapping(unsigned long addr, unsigned long end,
-				unsigned long size, pte_t *pte)
-{
-	unsigned long mask = ~(size - 1);
-	unsigned long aligned_start = addr & mask;
-	unsigned long aligned_end = addr + size;
-	struct change_mapping_params params;
-	bool split_region = false;
-
-	if ((end - addr) < size) {
-		/*
-		 * We're going to clear the PTE, but not flushed
-		 * the mapping, time to remap and flush. The
-		 * effects if visible outside the processor or
-		 * if we are running in code close to the
-		 * mapping we cleared, we are in trouble.
-		 */
-		if (overlaps_kernel_text(aligned_start, addr) ||
-			overlaps_kernel_text(end, aligned_end)) {
-			/*
-			 * Hack, just return, don't pte_clear
-			 */
-			WARN_ONCE(1, "Linear mapping %lx->%lx overlaps kernel "
-				  "text, not splitting\n", addr, end);
-			return;
-		}
-		split_region = true;
-	}
-
-	if (split_region) {
-		params.pte = pte;
-		params.start = addr;
-		params.end = end;
-		params.aligned_start = addr & ~(size - 1);
-		params.aligned_end = min_t(unsigned long, aligned_end,
-				(unsigned long)__va(memblock_end_of_DRAM()));
-		stop_machine(stop_machine_change_mapping, &params, NULL);
-		return;
-	}
-
-	pte_clear(&init_mm, addr, pte);
-}
-
-static void remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
+static void __meminit remove_pmd_table(pmd_t *pmd_start, unsigned long addr,
 			     unsigned long end)
 {
 	unsigned long next;
@@ -815,7 +838,12 @@
 			continue;
 
 		if (pmd_is_leaf(*pmd)) {
-			split_kernel_mapping(addr, end, PMD_SIZE, (pte_t *)pmd);
+			if (!IS_ALIGNED(addr, PMD_SIZE) ||
+			    !IS_ALIGNED(next, PMD_SIZE)) {
+				WARN_ONCE(1, "%s: unaligned range\n", __func__);
+				continue;
+			}
+			pte_clear(&init_mm, addr, (pte_t *)pmd);
 			continue;
 		}
 
@@ -825,7 +853,7 @@
 	}
 }
 
-static void remove_pud_table(pud_t *pud_start, unsigned long addr,
+static void __meminit remove_pud_table(pud_t *pud_start, unsigned long addr,
 			     unsigned long end)
 {
 	unsigned long next;
@@ -840,7 +868,12 @@
 			continue;
 
 		if (pud_is_leaf(*pud)) {
-			split_kernel_mapping(addr, end, PUD_SIZE, (pte_t *)pud);
+			if (!IS_ALIGNED(addr, PUD_SIZE) ||
+			    !IS_ALIGNED(next, PUD_SIZE)) {
+				WARN_ONCE(1, "%s: unaligned range\n", __func__);
+				continue;
+			}
+			pte_clear(&init_mm, addr, (pte_t *)pud);
 			continue;
 		}
 
@@ -868,12 +901,19 @@
 			continue;
 
 		if (p4d_is_leaf(*p4d)) {
-			split_kernel_mapping(addr, end, P4D_SIZE, (pte_t *)p4d);
+			if (!IS_ALIGNED(addr, P4D_SIZE) ||
+			    !IS_ALIGNED(next, P4D_SIZE)) {
+				WARN_ONCE(1, "%s: unaligned range\n", __func__);
+				continue;
+			}
+
+			pte_clear(&init_mm, addr, (pte_t *)pgd);
 			continue;
 		}
 
 		pud_base = (pud_t *)p4d_page_vaddr(*p4d);
 		remove_pud_table(pud_base, addr, next);
+		free_pud_table(pud_base, p4d);
 	}
 
 	spin_unlock(&init_mm.page_table_lock);
@@ -889,7 +929,8 @@
 		return -1;
 	}
 
-	return create_physical_mapping(__pa(start), __pa(end), nid, prot);
+	return create_physical_mapping(__pa(start), __pa(end),
+				       radix_mem_block_size, nid, prot);
 }
 
 int __meminit radix__remove_section_mapping(unsigned long start, unsigned long end)
diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c
index b5cc9b2..0d23376 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -16,6 +16,7 @@
 #include <asm/tlbflush.h>
 #include <asm/trace.h>
 #include <asm/cputhreads.h>
+#include <asm/plpar_wrappers.h>
 
 #define RIC_FLUSH_TLB 0
 #define RIC_FLUSH_PWC 1
@@ -694,7 +695,14 @@
 			goto local;
 		}
 
-		if (cputlb_use_tlbie()) {
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long tgt = H_RPTI_TARGET_CMMU;
+
+			if (atomic_read(&mm->context.copros) > 0)
+				tgt |= H_RPTI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+					       H_RPTI_PAGE_ALL, 0, -1UL);
+		} else if (cputlb_use_tlbie()) {
 			if (mm_needs_flush_escalation(mm))
 				_tlbie_pid(pid, RIC_FLUSH_ALL);
 			else
@@ -727,7 +735,16 @@
 				goto local;
 			}
 		}
-		if (cputlb_use_tlbie())
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long tgt = H_RPTI_TARGET_CMMU;
+			unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+					     H_RPTI_TYPE_PRT;
+
+			if (atomic_read(&mm->context.copros) > 0)
+				tgt |= H_RPTI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, tgt, type,
+					       H_RPTI_PAGE_ALL, 0, -1UL);
+		} else if (cputlb_use_tlbie())
 			_tlbie_pid(pid, RIC_FLUSH_ALL);
 		else
 			_tlbiel_pid_multicast(mm, pid, RIC_FLUSH_ALL);
@@ -760,7 +777,19 @@
 			exit_flush_lazy_tlbs(mm);
 			goto local;
 		}
-		if (cputlb_use_tlbie())
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long tgt, pg_sizes, size;
+
+			tgt = H_RPTI_TARGET_CMMU;
+			pg_sizes = psize_to_rpti_pgsize(psize);
+			size = 1UL << mmu_psize_to_shift(psize);
+
+			if (atomic_read(&mm->context.copros) > 0)
+				tgt |= H_RPTI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB,
+					       pg_sizes, vmaddr,
+					       vmaddr + size);
+		} else if (cputlb_use_tlbie())
 			_tlbie_va(vmaddr, pid, psize, RIC_FLUSH_TLB);
 		else
 			_tlbiel_va_multicast(mm, vmaddr, pid, psize, RIC_FLUSH_TLB);
@@ -810,7 +839,14 @@
  */
 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
-	if (cputlb_use_tlbie())
+	if (!mmu_has_feature(MMU_FTR_GTSE)) {
+		unsigned long tgt = H_RPTI_TARGET_CMMU | H_RPTI_TARGET_NMMU;
+		unsigned long type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+				     H_RPTI_TYPE_PRT;
+
+		pseries_rpt_invalidate(0, tgt, type, H_RPTI_PAGE_ALL,
+				       start, end);
+	} else if (cputlb_use_tlbie())
 		_tlbie_pid(0, RIC_FLUSH_ALL);
 	else
 		_tlbiel_kernel_broadcast();
@@ -864,7 +900,17 @@
 				nr_pages > tlb_local_single_page_flush_ceiling);
 	}
 
-	if (full) {
+	if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+		unsigned long tgt = H_RPTI_TARGET_CMMU;
+		unsigned long pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+		if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+			pg_sizes |= psize_to_rpti_pgsize(MMU_PAGE_2M);
+		if (atomic_read(&mm->context.copros) > 0)
+			tgt |= H_RPTI_TARGET_NMMU;
+		pseries_rpt_invalidate(pid, tgt, H_RPTI_TYPE_TLB, pg_sizes,
+				       start, end);
+	} else if (full) {
 		if (local) {
 			_tlbiel_pid(pid, RIC_FLUSH_TLB);
 		} else {
@@ -1046,7 +1092,17 @@
 				nr_pages > tlb_local_single_page_flush_ceiling);
 	}
 
-	if (full) {
+	if (!mmu_has_feature(MMU_FTR_GTSE) && !local) {
+		unsigned long tgt = H_RPTI_TARGET_CMMU;
+		unsigned long type = H_RPTI_TYPE_TLB;
+		unsigned long pg_sizes = psize_to_rpti_pgsize(psize);
+
+		if (also_pwc)
+			type |= H_RPTI_TYPE_PWC;
+		if (atomic_read(&mm->context.copros) > 0)
+			tgt |= H_RPTI_TARGET_NMMU;
+		pseries_rpt_invalidate(pid, tgt, type, pg_sizes, start, end);
+	} else if (full) {
 		if (local) {
 			_tlbiel_pid(pid, also_pwc ? RIC_FLUSH_ALL : RIC_FLUSH_TLB);
 		} else {
@@ -1111,7 +1167,19 @@
 			exit_flush_lazy_tlbs(mm);
 			goto local;
 		}
-		if (cputlb_use_tlbie())
+		if (!mmu_has_feature(MMU_FTR_GTSE)) {
+			unsigned long tgt, type, pg_sizes;
+
+			tgt = H_RPTI_TARGET_CMMU;
+			type = H_RPTI_TYPE_TLB | H_RPTI_TYPE_PWC |
+			       H_RPTI_TYPE_PRT;
+			pg_sizes = psize_to_rpti_pgsize(mmu_virtual_psize);
+
+			if (atomic_read(&mm->context.copros) > 0)
+				tgt |= H_RPTI_TARGET_NMMU;
+			pseries_rpt_invalidate(pid, tgt, type, pg_sizes,
+					       addr, end);
+		} else if (cputlb_use_tlbie())
 			_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
 		else
 			_tlbiel_va_range_multicast(mm,
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 59327ce..b2eeea3 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -14,6 +14,8 @@
 #include <asm/prom.h>
 #include <asm/drmem.h>
 
+static int n_root_addr_cells, n_root_size_cells;
+
 static struct drmem_lmb_info __drmem_info;
 struct drmem_lmb_info *drmem_info = &__drmem_info;
 
@@ -189,12 +191,13 @@
 	return rc;
 }
 
-static void __init read_drconf_v1_cell(struct drmem_lmb *lmb,
+static void read_drconf_v1_cell(struct drmem_lmb *lmb,
 				       const __be32 **prop)
 {
 	const __be32 *p = *prop;
 
-	lmb->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+	lmb->base_addr = of_read_number(p, n_root_addr_cells);
+	p += n_root_addr_cells;
 	lmb->drc_index = of_read_number(p++, 1);
 
 	p++; /* skip reserved field */
@@ -205,29 +208,33 @@
 	*prop = p;
 }
 
-static void __init __walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm,
-			void (*func)(struct drmem_lmb *, const __be32 **))
+static int
+__walk_drmem_v1_lmbs(const __be32 *prop, const __be32 *usm, void *data,
+		     int (*func)(struct drmem_lmb *, const __be32 **, void *))
 {
 	struct drmem_lmb lmb;
 	u32 i, n_lmbs;
+	int ret = 0;
 
 	n_lmbs = of_read_number(prop++, 1);
-	if (n_lmbs == 0)
-		return;
-
 	for (i = 0; i < n_lmbs; i++) {
 		read_drconf_v1_cell(&lmb, &prop);
-		func(&lmb, &usm);
+		ret = func(&lmb, &usm, data);
+		if (ret)
+			break;
 	}
+
+	return ret;
 }
 
-static void __init read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
+static void read_drconf_v2_cell(struct of_drconf_cell_v2 *dr_cell,
 				       const __be32 **prop)
 {
 	const __be32 *p = *prop;
 
 	dr_cell->seq_lmbs = of_read_number(p++, 1);
-	dr_cell->base_addr = dt_mem_next_cell(dt_root_addr_cells, &p);
+	dr_cell->base_addr = of_read_number(p, n_root_addr_cells);
+	p += n_root_addr_cells;
 	dr_cell->drc_index = of_read_number(p++, 1);
 	dr_cell->aa_index = of_read_number(p++, 1);
 	dr_cell->flags = of_read_number(p++, 1);
@@ -235,17 +242,16 @@
 	*prop = p;
 }
 
-static void __init __walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm,
-			void (*func)(struct drmem_lmb *, const __be32 **))
+static int
+__walk_drmem_v2_lmbs(const __be32 *prop, const __be32 *usm, void *data,
+		     int (*func)(struct drmem_lmb *, const __be32 **, void *))
 {
 	struct of_drconf_cell_v2 dr_cell;
 	struct drmem_lmb lmb;
 	u32 i, j, lmb_sets;
+	int ret = 0;
 
 	lmb_sets = of_read_number(prop++, 1);
-	if (lmb_sets == 0)
-		return;
-
 	for (i = 0; i < lmb_sets; i++) {
 		read_drconf_v2_cell(&dr_cell, &prop);
 
@@ -259,21 +265,29 @@
 			lmb.aa_index = dr_cell.aa_index;
 			lmb.flags = dr_cell.flags;
 
-			func(&lmb, &usm);
+			ret = func(&lmb, &usm, data);
+			if (ret)
+				break;
 		}
 	}
+
+	return ret;
 }
 
 #ifdef CONFIG_PPC_PSERIES
-void __init walk_drmem_lmbs_early(unsigned long node,
-			void (*func)(struct drmem_lmb *, const __be32 **))
+int __init walk_drmem_lmbs_early(unsigned long node, void *data,
+		int (*func)(struct drmem_lmb *, const __be32 **, void *))
 {
 	const __be32 *prop, *usm;
-	int len;
+	int len, ret = -ENODEV;
 
 	prop = of_get_flat_dt_prop(node, "ibm,lmb-size", &len);
 	if (!prop || len < dt_root_size_cells * sizeof(__be32))
-		return;
+		return ret;
+
+	/* Get the address & size cells */
+	n_root_addr_cells = dt_root_addr_cells;
+	n_root_size_cells = dt_root_size_cells;
 
 	drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
 
@@ -281,20 +295,21 @@
 
 	prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory", &len);
 	if (prop) {
-		__walk_drmem_v1_lmbs(prop, usm, func);
+		ret = __walk_drmem_v1_lmbs(prop, usm, data, func);
 	} else {
 		prop = of_get_flat_dt_prop(node, "ibm,dynamic-memory-v2",
 					   &len);
 		if (prop)
-			__walk_drmem_v2_lmbs(prop, usm, func);
+			ret = __walk_drmem_v2_lmbs(prop, usm, data, func);
 	}
 
 	memblock_dump_all();
+	return ret;
 }
 
 #endif
 
-static int __init init_drmem_lmb_size(struct device_node *dn)
+static int init_drmem_lmb_size(struct device_node *dn)
 {
 	const __be32 *prop;
 	int len;
@@ -303,12 +318,12 @@
 		return 0;
 
 	prop = of_get_property(dn, "ibm,lmb-size", &len);
-	if (!prop || len < dt_root_size_cells * sizeof(__be32)) {
+	if (!prop || len < n_root_size_cells * sizeof(__be32)) {
 		pr_info("Could not determine LMB size\n");
 		return -1;
 	}
 
-	drmem_info->lmb_size = dt_mem_next_cell(dt_root_size_cells, &prop);
+	drmem_info->lmb_size = of_read_number(prop, n_root_size_cells);
 	return 0;
 }
 
@@ -329,24 +344,36 @@
 	return prop;
 }
 
-void __init walk_drmem_lmbs(struct device_node *dn,
-			    void (*func)(struct drmem_lmb *, const __be32 **))
+int walk_drmem_lmbs(struct device_node *dn, void *data,
+		    int (*func)(struct drmem_lmb *, const __be32 **, void *))
 {
 	const __be32 *prop, *usm;
+	int ret = -ENODEV;
+
+	if (!of_root)
+		return ret;
+
+	/* Get the address & size cells */
+	of_node_get(of_root);
+	n_root_addr_cells = of_n_addr_cells(of_root);
+	n_root_size_cells = of_n_size_cells(of_root);
+	of_node_put(of_root);
 
 	if (init_drmem_lmb_size(dn))
-		return;
+		return ret;
 
 	usm = of_get_usable_memory(dn);
 
 	prop = of_get_property(dn, "ibm,dynamic-memory", NULL);
 	if (prop) {
-		__walk_drmem_v1_lmbs(prop, usm, func);
+		ret = __walk_drmem_v1_lmbs(prop, usm, data, func);
 	} else {
 		prop = of_get_property(dn, "ibm,dynamic-memory-v2", NULL);
 		if (prop)
-			__walk_drmem_v2_lmbs(prop, usm, func);
+			ret = __walk_drmem_v2_lmbs(prop, usm, data, func);
 	}
+
+	return ret;
 }
 
 static void __init init_drmem_v1_lmbs(const __be32 *prop)
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 641fc5f..925a723 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -42,39 +42,7 @@
 #include <asm/kup.h>
 #include <asm/inst.h>
 
-/*
- * Check whether the instruction inst is a store using
- * an update addressing form which will update r1.
- */
-static bool store_updates_sp(struct ppc_inst inst)
-{
-	/* check for 1 in the rA field */
-	if (((ppc_inst_val(inst) >> 16) & 0x1f) != 1)
-		return false;
-	/* check major opcode */
-	switch (ppc_inst_primary_opcode(inst)) {
-	case OP_STWU:
-	case OP_STBU:
-	case OP_STHU:
-	case OP_STFSU:
-	case OP_STFDU:
-		return true;
-	case OP_STD:	/* std or stdu */
-		return (ppc_inst_val(inst) & 3) == 1;
-	case OP_31:
-		/* check minor opcode */
-		switch ((ppc_inst_val(inst) >> 1) & 0x3ff) {
-		case OP_31_XOP_STDUX:
-		case OP_31_XOP_STWUX:
-		case OP_31_XOP_STBUX:
-		case OP_31_XOP_STHUX:
-		case OP_31_XOP_STFSUX:
-		case OP_31_XOP_STFDUX:
-			return true;
-		}
-	}
-	return false;
-}
+
 /*
  * do_page_fault error handling helpers
  */
@@ -267,54 +235,6 @@
 	return false;
 }
 
-static bool bad_stack_expansion(struct pt_regs *regs, unsigned long address,
-				struct vm_area_struct *vma, unsigned int flags,
-				bool *must_retry)
-{
-	/*
-	 * N.B. The POWER/Open ABI allows programs to access up to
-	 * 288 bytes below the stack pointer.
-	 * The kernel signal delivery code writes up to about 1.5kB
-	 * below the stack pointer (r1) before decrementing it.
-	 * The exec code can write slightly over 640kB to the stack
-	 * before setting the user r1.  Thus we allow the stack to
-	 * expand to 1MB without further checks.
-	 */
-	if (address + 0x100000 < vma->vm_end) {
-		struct ppc_inst __user *nip = (struct ppc_inst __user *)regs->nip;
-		/* get user regs even if this fault is in kernel mode */
-		struct pt_regs *uregs = current->thread.regs;
-		if (uregs == NULL)
-			return true;
-
-		/*
-		 * A user-mode access to an address a long way below
-		 * the stack pointer is only valid if the instruction
-		 * is one which would update the stack pointer to the
-		 * address accessed if the instruction completed,
-		 * i.e. either stwu rs,n(r1) or stwux rs,r1,rb
-		 * (or the byte, halfword, float or double forms).
-		 *
-		 * If we don't check this then any write to the area
-		 * between the last mapped region and the stack will
-		 * expand the stack rather than segfaulting.
-		 */
-		if (address + 2048 >= uregs->gpr[1])
-			return false;
-
-		if ((flags & FAULT_FLAG_WRITE) && (flags & FAULT_FLAG_USER) &&
-		    access_ok(nip, sizeof(*nip))) {
-			struct ppc_inst inst;
-
-			if (!probe_user_read_inst(&inst, nip))
-				return !store_updates_sp(inst);
-			*must_retry = true;
-		}
-		return true;
-	}
-	return false;
-}
-
 #ifdef CONFIG_PPC_MEM_KEYS
 static bool access_pkey_error(bool is_write, bool is_exec, bool is_pkey,
 			      struct vm_area_struct *vma)
@@ -480,7 +400,6 @@
 	int is_user = user_mode(regs);
 	int is_write = page_fault_is_write(error_code);
 	vm_fault_t fault, major = 0;
-	bool must_retry = false;
 	bool kprobe_fault = kprobe_page_fault(regs, 11);
 
 	if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
@@ -569,30 +488,15 @@
 	vma = find_vma(mm, address);
 	if (unlikely(!vma))
 		return bad_area(regs, address);
-	if (likely(vma->vm_start <= address))
-		goto good_area;
-	if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
-		return bad_area(regs, address);
 
-	/* The stack is being expanded, check if it's valid */
-	if (unlikely(bad_stack_expansion(regs, address, vma, flags,
-					 &must_retry))) {
-		if (!must_retry)
+	if (unlikely(vma->vm_start > address)) {
+		if (unlikely(!(vma->vm_flags & VM_GROWSDOWN)))
 			return bad_area(regs, address);
 
-		mmap_read_unlock(mm);
-		if (fault_in_pages_readable((const char __user *)regs->nip,
-					    sizeof(unsigned int)))
-			return bad_area_nosemaphore(regs, address);
-		goto retry;
+		if (unlikely(expand_stack(vma, address)))
+			return bad_area(regs, address);
 	}
 
-	/* Try to expand it */
-	if (unlikely(expand_stack(vma, address)))
-		return bad_area(regs, address);
-
-good_area:
-
 #ifdef CONFIG_PPC_MEM_KEYS
 	if (unlikely(access_pkey_error(is_write, is_exec,
 				       (error_code & DSISR_KEYFAULT), vma)))
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index e9bfbcc..2629254 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -684,3 +684,21 @@
 		}
 	}
 }
+
+void __init gigantic_hugetlb_cma_reserve(void)
+{
+	unsigned long order = 0;
+
+	if (radix_enabled())
+		order = PUD_SHIFT - PAGE_SHIFT;
+	else if (!firmware_has_feature(FW_FEATURE_LPAR) && mmu_psize_defs[MMU_PAGE_16G].shift)
+		/*
+		 * For pseries we do use ibm,expected#pages for reserving 16G pages.
+		 */
+		order = mmu_psize_to_shift(MMU_PAGE_16G) - PAGE_SHIFT;
+
+	if (order) {
+		VM_WARN_ON(order < MAX_ORDER);
+		hugetlb_cma_reserve(order);
+	}
+}
diff --git a/arch/powerpc/mm/init_32.c b/arch/powerpc/mm/init_32.c
index 5a5469e..bf1717f 100644
--- a/arch/powerpc/mm/init_32.c
+++ b/arch/powerpc/mm/init_32.c
@@ -171,6 +171,8 @@
 	btext_unmap();
 #endif
 
+	kasan_mmu_init();
+
 	setup_kup();
 
 	/* Shortly after that, the entire linear mapping will be available */
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index bc73abf..4ae5fc0 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -406,13 +406,15 @@
 		}
 		if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] &
 						OV5_FEAT(OV5_RADIX_GTSE))) {
-			pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n");
-		}
+			cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
+		} else
+			cur_cpu_spec->mmu_features |= MMU_FTR_GTSE;
 		/* Do radix anyway - the hypervisor said we had to */
 		cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX;
 	} else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) {
 		/* Hypervisor only supports hash - disable radix */
 		cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+		cur_cpu_spec->mmu_features &= ~MMU_FTR_GTSE;
 	}
 }
 
diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c
index 0760e1e..fb29404 100644
--- a/arch/powerpc/mm/kasan/kasan_init_32.c
+++ b/arch/powerpc/mm/kasan/kasan_init_32.c
@@ -115,24 +115,17 @@
 	unsigned long k_end = (unsigned long)kasan_mem_to_shadow((void *)VMALLOC_END);
 
 	kasan_update_early_region(k_start, k_end, __pte(0));
+
+#ifdef MODULES_VADDR
+	k_start = (unsigned long)kasan_mem_to_shadow((void *)MODULES_VADDR);
+	k_end = (unsigned long)kasan_mem_to_shadow((void *)MODULES_END);
+	kasan_update_early_region(k_start, k_end, __pte(0));
+#endif
 }
 
-static void __init kasan_mmu_init(void)
+void __init kasan_mmu_init(void)
 {
 	int ret;
-	struct memblock_region *reg;
-
-	for_each_memblock(memory, reg) {
-		phys_addr_t base = reg->base;
-		phys_addr_t top = min(base + reg->size, total_lowmem);
-
-		if (base >= top)
-			continue;
-
-		ret = kasan_init_region(__va(base), top - base);
-		if (ret)
-			panic("kasan: kasan_init_region() failed");
-	}
 
 	if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ||
 	    IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
@@ -141,12 +134,24 @@
 		if (ret)
 			panic("kasan: kasan_init_shadow_page_tables() failed");
 	}
-
 }
 
 void __init kasan_init(void)
 {
-	kasan_mmu_init();
+	struct memblock_region *reg;
+
+	for_each_memblock(memory, reg) {
+		phys_addr_t base = reg->base;
+		phys_addr_t top = min(base + reg->size, total_lowmem);
+		int ret;
+
+		if (base >= top)
+			continue;
+
+		ret = kasan_init_region(__va(base), top - base);
+		if (ret)
+			panic("kasan: kasan_init_region() failed");
+	}
 
 	kasan_remap_early_shadow_ro();
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c2c11eb..9dafc63 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -127,8 +127,6 @@
 	unsigned long nr_pages = size >> PAGE_SHIFT;
 	int rc;
 
-	resize_hpt_for_hotplug(memblock_phys_mem_size());
-
 	start = (unsigned long)__va(start);
 	rc = create_section_mapping(start, start + size, nid,
 				    params->pgprot);
@@ -161,9 +159,6 @@
 	 * hit that section of memory
 	 */
 	vm_unmap_aliases();
-
-	if (resize_hpt_for_hotplug(memblock_phys_mem_size()) == -ENOSPC)
-		pr_warn("Hash collision while resizing HPT\n");
 }
 #endif
 
diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
index d5e2704..bf24451 100644
--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
+++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
@@ -71,7 +71,6 @@
 END_BTB_FLUSH_SECTION
 	std	r7,EX_TLB_R7(r12)
 #endif
-	TLB_MISS_PROLOG_STATS
 .endm
 
 .macro tlb_epilog_bolted
@@ -85,7 +84,6 @@
 	mtcr	r14
 	ld	r14,EX_TLB_R14(r12)
 	ld	r15,EX_TLB_R15(r12)
-	TLB_MISS_RESTORE_STATS
 	ld	r16,EX_TLB_R16(r12)
 	mfspr	r12,SPRN_SPRG_GEN_SCRATCH
 .endm
@@ -128,7 +126,6 @@
 	ori	r10,r10,_PAGE_PRESENT
 	oris	r11,r10,_PAGE_ACCESSED@h
 
-	TLB_MISS_STATS_SAVE_INFO_BOLTED
 	bne	tlb_miss_kernel_bolted
 
 tlb_miss_common_bolted:
@@ -209,7 +206,6 @@
 	tlbwe
 
 tlb_miss_done_bolted:
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
 	tlb_epilog_bolted
 	rfi
 
@@ -229,11 +225,9 @@
 	andi.	r10,r11,_PAGE_EXEC|_PAGE_BAP_SX
 	bne	itlb_miss_fault_bolted
 dtlb_miss_fault_bolted:
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
 	tlb_epilog_bolted
 	b	exc_data_storage_book3e
 itlb_miss_fault_bolted:
-	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
 	tlb_epilog_bolted
 	b	exc_instruction_storage_book3e
 
@@ -243,7 +237,6 @@
 
 	rldicl.	r10,r16,64-PGTABLE_EADDR_SIZE,PGTABLE_EADDR_SIZE+4
 	srdi	r15,r16,60		/* get region */
-	TLB_MISS_STATS_SAVE_INFO_BOLTED
 	bne-	itlb_miss_fault_bolted
 
 	li	r11,_PAGE_PRESENT|_PAGE_EXEC	/* Base perm */
@@ -276,7 +269,6 @@
 	srdi.	r15,r16,60		/* get region */
 	ori	r16,r16,1
 
-	TLB_MISS_STATS_SAVE_INFO_BOLTED
 	bne	tlb_miss_kernel_e6500	/* user/kernel test */
 
 	b	tlb_miss_common_e6500
@@ -288,7 +280,6 @@
 	srdi.	r15,r16,60		/* get region */
 	rldicr	r16,r16,0,62
 
-	TLB_MISS_STATS_SAVE_INFO_BOLTED
 	bne	tlb_miss_kernel_e6500	/* user vs kernel check */
 
 /*
@@ -460,7 +451,6 @@
 	.endm
 
 	tlb_unlock_e6500
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
 	tlb_epilog_bolted
 	rfi
 
@@ -519,11 +509,9 @@
 	andi.	r16,r16,1
 	bne	itlb_miss_fault_e6500
 dtlb_miss_fault_e6500:
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
 	tlb_epilog_bolted
 	b	exc_data_storage_book3e
 itlb_miss_fault_e6500:
-	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
 	tlb_epilog_bolted
 	b	exc_instruction_storage_book3e
 #endif /* CONFIG_PPC_FSL_BOOK3E */
@@ -548,7 +536,6 @@
 	mfspr	r16,SPRN_DEAR		/* get faulting address */
 	srdi	r15,r16,60		/* get region */
 	cmpldi	cr0,r15,0xc		/* linear mapping ? */
-	TLB_MISS_STATS_SAVE_INFO
 	beq	tlb_load_linear		/* yes -> go to linear map load */
 
 	/* The page tables are mapped virtually linear. At this point, though,
@@ -600,7 +587,6 @@
 	/* We got a crappy address, just fault with whatever DEAR and ESR
 	 * are here
 	 */
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
 	TLB_MISS_EPILOG_ERROR
 	b	exc_data_storage_book3e
 
@@ -624,7 +610,6 @@
 	 */
 	srdi	r15,r16,60		/* get region */
 	cmpldi	cr0,r15,0xc		/* linear mapping ? */
-	TLB_MISS_STATS_SAVE_INFO
 	beq	tlb_load_linear		/* yes -> go to linear map load */
 
 	/* We do the user/kernel test for the PID here along with the RW test
@@ -646,7 +631,6 @@
 	beq+	normal_tlb_miss
 
 	/* We got a crappy address, just fault */
-	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
 	TLB_MISS_EPILOG_ERROR
 	b	exc_instruction_storage_book3e
 
@@ -745,7 +729,6 @@
 	 * level 0 and just going back to userland. They are only needed
 	 * if you are going to take an access fault
 	 */
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_NORM_OK)
 	TLB_MISS_EPILOG_SUCCESS
 	rfi
 
@@ -757,11 +740,9 @@
 	ld	r15,EX_TLB_ESR(r12)
 	mtspr	SPRN_DEAR,r14
 	mtspr	SPRN_ESR,r15
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
 	TLB_MISS_EPILOG_ERROR
 	b	exc_data_storage_book3e
-1:	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
-	TLB_MISS_EPILOG_ERROR
+1:	TLB_MISS_EPILOG_ERROR
 	b	exc_instruction_storage_book3e
 
 
@@ -899,7 +880,6 @@
 1:
 END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_TLBRSRV)
 	/* Return to caller, normal case */
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK);
 	TLB_MISS_EPILOG_SUCCESS
 	rfi
 
@@ -935,18 +915,15 @@
 	beq	1f
 	mtspr	SPRN_DEAR,r15
 	mtspr	SPRN_ESR,r16
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT);
 	TLB_MISS_EPILOG_ERROR
 	b	exc_data_storage_book3e
-1:	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT);
-	TLB_MISS_EPILOG_ERROR
+1:	TLB_MISS_EPILOG_ERROR
 	b	exc_instruction_storage_book3e
 
 virt_page_table_tlb_miss_whacko_fault:
 	/* The linear fault will restart everything so ESR and DEAR will
 	 * not have been clobbered, let's just fault with what we have
 	 */
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_FAULT);
 	TLB_MISS_EPILOG_ERROR
 	b	exc_data_storage_book3e
 
@@ -971,7 +948,6 @@
 	mfspr	r16,SPRN_DEAR		/* get faulting address */
 	srdi	r11,r16,60		/* get region */
 	cmpldi	cr0,r11,0xc		/* linear mapping ? */
-	TLB_MISS_STATS_SAVE_INFO
 	beq	tlb_load_linear		/* yes -> go to linear map load */
 
 	/* We do the user/kernel test for the PID here along with the RW test
@@ -991,7 +967,6 @@
 	/* We got a crappy address, just fault with whatever DEAR and ESR
 	 * are here
 	 */
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_NORM_FAULT)
 	TLB_MISS_EPILOG_ERROR
 	b	exc_data_storage_book3e
 
@@ -1015,7 +990,6 @@
 	 */
 	srdi	r11,r16,60		/* get region */
 	cmpldi	cr0,r11,0xc		/* linear mapping ? */
-	TLB_MISS_STATS_SAVE_INFO
 	beq	tlb_load_linear		/* yes -> go to linear map load */
 
 	/* We do the user/kernel test for the PID here along with the RW test
@@ -1033,7 +1007,6 @@
 	beq+	htw_tlb_miss
 
 	/* We got a crappy address, just fault */
-	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_NORM_FAULT)
 	TLB_MISS_EPILOG_ERROR
 	b	exc_instruction_storage_book3e
 
@@ -1130,7 +1103,6 @@
 	 * level 0 and just going back to userland. They are only needed
 	 * if you are going to take an access fault
 	 */
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_PT_OK)
 	TLB_MISS_EPILOG_SUCCESS
 	rfi
 
@@ -1142,11 +1114,9 @@
 	beq	1f
 	mtspr	SPRN_DEAR,r16
 	mtspr	SPRN_ESR,r14
-	TLB_MISS_STATS_D(MMSTAT_TLB_MISS_PT_FAULT)
 	TLB_MISS_EPILOG_ERROR
 	b	exc_data_storage_book3e
-1:	TLB_MISS_STATS_I(MMSTAT_TLB_MISS_PT_FAULT)
-	TLB_MISS_EPILOG_ERROR
+1:	TLB_MISS_EPILOG_ERROR
 	b	exc_instruction_storage_book3e
 
 /*
@@ -1221,7 +1191,6 @@
 	 * We do that because we can't resume a fault within a TLB
 	 * miss handler, due to MAS and TLB reservation being clobbered.
 	 */
-	TLB_MISS_STATS_X(MMSTAT_TLB_MISS_LINEAR)
 	TLB_MISS_EPILOG_ERROR
 	rfi
 
@@ -1233,13 +1202,3 @@
 	b	exc_data_storage_book3e
 1:	TLB_MISS_EPILOG_ERROR_SPECIAL
 	b	exc_instruction_storage_book3e
-
-
-#ifdef CONFIG_BOOK3E_MMU_TLB_STATS
-.tlb_stat_inc:
-1:	ldarx	r8,0,r9
-	addi	r8,r8,1
-	stdcx.	r8,0,r9
-	bne-	1b
-	blr
-#endif
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 9fcf2d1..058fee9a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -221,7 +221,8 @@
 	}
 }
 
-/* Returns nid in the range [0..MAX_NUMNODES-1], or -1 if no useful numa
+/*
+ * Returns nid in the range [0..nr_node_ids], or -1 if no useful NUMA
  * info is found.
  */
 static int associativity_to_nid(const __be32 *associativity)
@@ -235,7 +236,7 @@
 		nid = of_read_number(&associativity[min_common_depth], 1);
 
 	/* POWER4 LPAR uses 0xffff as invalid node */
-	if (nid == 0xffff || nid >= MAX_NUMNODES)
+	if (nid == 0xffff || nid >= nr_node_ids)
 		nid = NUMA_NO_NODE;
 
 	if (nid > 0 &&
@@ -448,7 +449,7 @@
 		index = lmb->aa_index * aa.array_sz + min_common_depth - 1;
 		nid = of_read_number(&aa.arrays[index], 1);
 
-		if (nid == 0xffff || nid >= MAX_NUMNODES)
+		if (nid == 0xffff || nid >= nr_node_ids)
 			nid = default_nid;
 
 		if (nid > 0) {
@@ -644,8 +645,9 @@
  * Extract NUMA information from the ibm,dynamic-reconfiguration-memory
  * node.  This assumes n_mem_{addr,size}_cells have been set.
  */
-static void __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
-					const __be32 **usm)
+static int __init numa_setup_drmem_lmb(struct drmem_lmb *lmb,
+					const __be32 **usm,
+					void *data)
 {
 	unsigned int ranges, is_kexec_kdump = 0;
 	unsigned long base, size, sz;
@@ -657,7 +659,7 @@
 	 */
 	if ((lmb->flags & DRCONF_MEM_RESERVED)
 	    || !(lmb->flags & DRCONF_MEM_ASSIGNED))
-		return;
+		return 0;
 
 	if (*usm)
 		is_kexec_kdump = 1;
@@ -669,7 +671,7 @@
 	if (is_kexec_kdump) {
 		ranges = read_usm_ranges(usm);
 		if (!ranges) /* there are no (base, size) duple */
-			return;
+			return 0;
 	}
 
 	do {
@@ -686,6 +688,8 @@
 		if (sz)
 			memblock_set_node(base, sz, &memblock.memory, nid);
 	} while (--ranges);
+
+	return 0;
 }
 
 static int __init parse_numa_properties(void)
@@ -787,7 +791,7 @@
 	 */
 	memory = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
 	if (memory) {
-		walk_drmem_lmbs(memory, numa_setup_drmem_lmb);
+		walk_drmem_lmbs(memory, NULL, numa_setup_drmem_lmb);
 		of_node_put(memory);
 	}
 
@@ -984,28 +988,6 @@
 }
 early_param("numa", early_numa);
 
-/*
- * The platform can inform us through one of several mechanisms
- * (post-migration device tree updates, PRRN or VPHN) that the NUMA
- * assignment of a resource has changed. This controls whether we act
- * on that. Disabled by default.
- */
-static bool topology_updates_enabled;
-
-static int __init early_topology_updates(char *p)
-{
-	if (!p)
-		return 0;
-
-	if (!strcmp(p, "on")) {
-		pr_warn("Caution: enabling topology updates\n");
-		topology_updates_enabled = true;
-	}
-
-	return 0;
-}
-early_param("topology_updates", early_topology_updates);
-
 #ifdef CONFIG_MEMORY_HOTPLUG
 /*
  * Find the node associated with a hot added memory section for
@@ -1144,98 +1126,9 @@
 
 /* Virtual Processor Home Node (VPHN) support */
 #ifdef CONFIG_PPC_SPLPAR
-struct topology_update_data {
-	struct topology_update_data *next;
-	unsigned int cpu;
-	int old_nid;
-	int new_nid;
-};
-
-#define TOPOLOGY_DEF_TIMER_SECS	60
-
-static u8 vphn_cpu_change_counts[NR_CPUS][MAX_DISTANCE_REF_POINTS];
-static cpumask_t cpu_associativity_changes_mask;
-static int vphn_enabled;
-static int prrn_enabled;
-static void reset_topology_timer(void);
-static int topology_timer_secs = 1;
 static int topology_inited;
 
 /*
- * Change polling interval for associativity changes.
- */
-int timed_topology_update(int nsecs)
-{
-	if (vphn_enabled) {
-		if (nsecs > 0)
-			topology_timer_secs = nsecs;
-		else
-			topology_timer_secs = TOPOLOGY_DEF_TIMER_SECS;
-
-		reset_topology_timer();
-	}
-
-	return 0;
-}
-
-/*
- * Store the current values of the associativity change counters in the
- * hypervisor.
- */
-static void setup_cpu_associativity_change_counters(void)
-{
-	int cpu;
-
-	/* The VPHN feature supports a maximum of 8 reference points */
-	BUILD_BUG_ON(MAX_DISTANCE_REF_POINTS > 8);
-
-	for_each_possible_cpu(cpu) {
-		int i;
-		u8 *counts = vphn_cpu_change_counts[cpu];
-		volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
-		for (i = 0; i < distance_ref_points_depth; i++)
-			counts[i] = hypervisor_counts[i];
-	}
-}
-
-/*
- * The hypervisor maintains a set of 8 associativity change counters in
- * the VPA of each cpu that correspond to the associativity levels in the
- * ibm,associativity-reference-points property. When an associativity
- * level changes, the corresponding counter is incremented.
- *
- * Set a bit in cpu_associativity_changes_mask for each cpu whose home
- * node associativity levels have changed.
- *
- * Returns the number of cpus with unhandled associativity changes.
- */
-static int update_cpu_associativity_changes_mask(void)
-{
-	int cpu;
-	cpumask_t *changes = &cpu_associativity_changes_mask;
-
-	for_each_possible_cpu(cpu) {
-		int i, changed = 0;
-		u8 *counts = vphn_cpu_change_counts[cpu];
-		volatile u8 *hypervisor_counts = lppaca_of(cpu).vphn_assoc_counts;
-
-		for (i = 0; i < distance_ref_points_depth; i++) {
-			if (hypervisor_counts[i] != counts[i]) {
-				counts[i] = hypervisor_counts[i];
-				changed = 1;
-			}
-		}
-		if (changed) {
-			cpumask_or(changes, changes, cpu_sibling_mask(cpu));
-			cpu = cpu_last_thread_sibling(cpu);
-		}
-	}
-
-	return cpumask_weight(changes);
-}
-
-/*
  * Retrieve the new associativity information for a virtual processor's
  * home node.
  */
@@ -1250,7 +1143,6 @@
 	switch (rc) {
 	case H_SUCCESS:
 		dbg("VPHN hcall succeeded. Reset polling...\n");
-		timed_topology_update(0);
 		goto out;
 
 	case H_FUNCTION:
@@ -1269,8 +1161,6 @@
 			, rc);
 		break;
 	}
-
-	stop_topology_update();
 out:
 	return rc;
 }
@@ -1314,380 +1204,8 @@
 	return new_nid;
 }
 
-/*
- * Update the CPU maps and sysfs entries for a single CPU when its NUMA
- * characteristics change. This function doesn't perform any locking and is
- * only safe to call from stop_machine().
- */
-static int update_cpu_topology(void *data)
-{
-	struct topology_update_data *update;
-	unsigned long cpu;
-
-	if (!data)
-		return -EINVAL;
-
-	cpu = smp_processor_id();
-
-	for (update = data; update; update = update->next) {
-		int new_nid = update->new_nid;
-		if (cpu != update->cpu)
-			continue;
-
-		unmap_cpu_from_node(cpu);
-		map_cpu_to_node(cpu, new_nid);
-		set_cpu_numa_node(cpu, new_nid);
-		set_cpu_numa_mem(cpu, local_memory_node(new_nid));
-		vdso_getcpu_init();
-	}
-
-	return 0;
-}
-
-static int update_lookup_table(void *data)
-{
-	struct topology_update_data *update;
-
-	if (!data)
-		return -EINVAL;
-
-	/*
-	 * Upon topology update, the numa-cpu lookup table needs to be updated
-	 * for all threads in the core, including offline CPUs, to ensure that
-	 * future hotplug operations respect the cpu-to-node associativity
-	 * properly.
-	 */
-	for (update = data; update; update = update->next) {
-		int nid, base, j;
-
-		nid = update->new_nid;
-		base = cpu_first_thread_sibling(update->cpu);
-
-		for (j = 0; j < threads_per_core; j++) {
-			update_numa_cpu_lookup_table(base + j, nid);
-		}
-	}
-
-	return 0;
-}
-
-/*
- * Update the node maps and sysfs entries for each cpu whose home node
- * has changed. Returns 1 when the topology has changed, and 0 otherwise.
- *
- * cpus_locked says whether we already hold cpu_hotplug_lock.
- */
-int numa_update_cpu_topology(bool cpus_locked)
-{
-	unsigned int cpu, sibling, changed = 0;
-	struct topology_update_data *updates, *ud;
-	cpumask_t updated_cpus;
-	struct device *dev;
-	int weight, new_nid, i = 0;
-
-	if (!prrn_enabled && !vphn_enabled && topology_inited)
-		return 0;
-
-	weight = cpumask_weight(&cpu_associativity_changes_mask);
-	if (!weight)
-		return 0;
-
-	updates = kcalloc(weight, sizeof(*updates), GFP_KERNEL);
-	if (!updates)
-		return 0;
-
-	cpumask_clear(&updated_cpus);
-
-	for_each_cpu(cpu, &cpu_associativity_changes_mask) {
-		/*
-		 * If siblings aren't flagged for changes, updates list
-		 * will be too short. Skip on this update and set for next
-		 * update.
-		 */
-		if (!cpumask_subset(cpu_sibling_mask(cpu),
-					&cpu_associativity_changes_mask)) {
-			pr_info("Sibling bits not set for associativity "
-					"change, cpu%d\n", cpu);
-			cpumask_or(&cpu_associativity_changes_mask,
-					&cpu_associativity_changes_mask,
-					cpu_sibling_mask(cpu));
-			cpu = cpu_last_thread_sibling(cpu);
-			continue;
-		}
-
-		new_nid = find_and_online_cpu_nid(cpu);
-
-		if (new_nid == numa_cpu_lookup_table[cpu]) {
-			cpumask_andnot(&cpu_associativity_changes_mask,
-					&cpu_associativity_changes_mask,
-					cpu_sibling_mask(cpu));
-			dbg("Assoc chg gives same node %d for cpu%d\n",
-					new_nid, cpu);
-			cpu = cpu_last_thread_sibling(cpu);
-			continue;
-		}
-
-		for_each_cpu(sibling, cpu_sibling_mask(cpu)) {
-			ud = &updates[i++];
-			ud->next = &updates[i];
-			ud->cpu = sibling;
-			ud->new_nid = new_nid;
-			ud->old_nid = numa_cpu_lookup_table[sibling];
-			cpumask_set_cpu(sibling, &updated_cpus);
-		}
-		cpu = cpu_last_thread_sibling(cpu);
-	}
-
-	/*
-	 * Prevent processing of 'updates' from overflowing array
-	 * where last entry filled in a 'next' pointer.
-	 */
-	if (i)
-		updates[i-1].next = NULL;
-
-	pr_debug("Topology update for the following CPUs:\n");
-	if (cpumask_weight(&updated_cpus)) {
-		for (ud = &updates[0]; ud; ud = ud->next) {
-			pr_debug("cpu %d moving from node %d "
-					  "to %d\n", ud->cpu,
-					  ud->old_nid, ud->new_nid);
-		}
-	}
-
-	/*
-	 * In cases where we have nothing to update (because the updates list
-	 * is too short or because the new topology is same as the old one),
-	 * skip invoking update_cpu_topology() via stop-machine(). This is
-	 * necessary (and not just a fast-path optimization) since stop-machine
-	 * can end up electing a random CPU to run update_cpu_topology(), and
-	 * thus trick us into setting up incorrect cpu-node mappings (since
-	 * 'updates' is kzalloc()'ed).
-	 *
-	 * And for the similar reason, we will skip all the following updating.
-	 */
-	if (!cpumask_weight(&updated_cpus))
-		goto out;
-
-	if (cpus_locked)
-		stop_machine_cpuslocked(update_cpu_topology, &updates[0],
-					&updated_cpus);
-	else
-		stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
-
-	/*
-	 * Update the numa-cpu lookup table with the new mappings, even for
-	 * offline CPUs. It is best to perform this update from the stop-
-	 * machine context.
-	 */
-	if (cpus_locked)
-		stop_machine_cpuslocked(update_lookup_table, &updates[0],
-					cpumask_of(raw_smp_processor_id()));
-	else
-		stop_machine(update_lookup_table, &updates[0],
-			     cpumask_of(raw_smp_processor_id()));
-
-	for (ud = &updates[0]; ud; ud = ud->next) {
-		unregister_cpu_under_node(ud->cpu, ud->old_nid);
-		register_cpu_under_node(ud->cpu, ud->new_nid);
-
-		dev = get_cpu_device(ud->cpu);
-		if (dev)
-			kobject_uevent(&dev->kobj, KOBJ_CHANGE);
-		cpumask_clear_cpu(ud->cpu, &cpu_associativity_changes_mask);
-		changed = 1;
-	}
-
-out:
-	kfree(updates);
-	return changed;
-}
-
-int arch_update_cpu_topology(void)
-{
-	return numa_update_cpu_topology(true);
-}
-
-static void topology_work_fn(struct work_struct *work)
-{
-	rebuild_sched_domains();
-}
-static DECLARE_WORK(topology_work, topology_work_fn);
-
-static void topology_schedule_update(void)
-{
-	schedule_work(&topology_work);
-}
-
-static void topology_timer_fn(struct timer_list *unused)
-{
-	if (prrn_enabled && cpumask_weight(&cpu_associativity_changes_mask))
-		topology_schedule_update();
-	else if (vphn_enabled) {
-		if (update_cpu_associativity_changes_mask() > 0)
-			topology_schedule_update();
-		reset_topology_timer();
-	}
-}
-static struct timer_list topology_timer;
-
-static void reset_topology_timer(void)
-{
-	if (vphn_enabled)
-		mod_timer(&topology_timer, jiffies + topology_timer_secs * HZ);
-}
-
-#ifdef CONFIG_SMP
-
-static int dt_update_callback(struct notifier_block *nb,
-				unsigned long action, void *data)
-{
-	struct of_reconfig_data *update = data;
-	int rc = NOTIFY_DONE;
-
-	switch (action) {
-	case OF_RECONFIG_UPDATE_PROPERTY:
-		if (of_node_is_type(update->dn, "cpu") &&
-		    !of_prop_cmp(update->prop->name, "ibm,associativity")) {
-			u32 core_id;
-			of_property_read_u32(update->dn, "reg", &core_id);
-			rc = dlpar_cpu_readd(core_id);
-			rc = NOTIFY_OK;
-		}
-		break;
-	}
-
-	return rc;
-}
-
-static struct notifier_block dt_update_nb = {
-	.notifier_call = dt_update_callback,
-};
-
-#endif
-
-/*
- * Start polling for associativity changes.
- */
-int start_topology_update(void)
-{
-	int rc = 0;
-
-	if (!topology_updates_enabled)
-		return 0;
-
-	if (firmware_has_feature(FW_FEATURE_PRRN)) {
-		if (!prrn_enabled) {
-			prrn_enabled = 1;
-#ifdef CONFIG_SMP
-			rc = of_reconfig_notifier_register(&dt_update_nb);
-#endif
-		}
-	}
-	if (firmware_has_feature(FW_FEATURE_VPHN) &&
-		   lppaca_shared_proc(get_lppaca())) {
-		if (!vphn_enabled) {
-			vphn_enabled = 1;
-			setup_cpu_associativity_change_counters();
-			timer_setup(&topology_timer, topology_timer_fn,
-				    TIMER_DEFERRABLE);
-			reset_topology_timer();
-		}
-	}
-
-	pr_info("Starting topology update%s%s\n",
-		(prrn_enabled ? " prrn_enabled" : ""),
-		(vphn_enabled ? " vphn_enabled" : ""));
-
-	return rc;
-}
-
-/*
- * Disable polling for VPHN associativity changes.
- */
-int stop_topology_update(void)
-{
-	int rc = 0;
-
-	if (!topology_updates_enabled)
-		return 0;
-
-	if (prrn_enabled) {
-		prrn_enabled = 0;
-#ifdef CONFIG_SMP
-		rc = of_reconfig_notifier_unregister(&dt_update_nb);
-#endif
-	}
-	if (vphn_enabled) {
-		vphn_enabled = 0;
-		rc = del_timer_sync(&topology_timer);
-	}
-
-	pr_info("Stopping topology update\n");
-
-	return rc;
-}
-
-int prrn_is_enabled(void)
-{
-	return prrn_enabled;
-}
-
-static int topology_read(struct seq_file *file, void *v)
-{
-	if (vphn_enabled || prrn_enabled)
-		seq_puts(file, "on\n");
-	else
-		seq_puts(file, "off\n");
-
-	return 0;
-}
-
-static int topology_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, topology_read, NULL);
-}
-
-static ssize_t topology_write(struct file *file, const char __user *buf,
-			      size_t count, loff_t *off)
-{
-	char kbuf[4]; /* "on" or "off" plus null. */
-	int read_len;
-
-	read_len = count < 3 ? count : 3;
-	if (copy_from_user(kbuf, buf, read_len))
-		return -EINVAL;
-
-	kbuf[read_len] = '\0';
-
-	if (!strncmp(kbuf, "on", 2)) {
-		topology_updates_enabled = true;
-		start_topology_update();
-	} else if (!strncmp(kbuf, "off", 3)) {
-		stop_topology_update();
-		topology_updates_enabled = false;
-	} else
-		return -EINVAL;
-
-	return count;
-}
-
-static const struct proc_ops topology_proc_ops = {
-	.proc_read	= seq_read,
-	.proc_write	= topology_write,
-	.proc_open	= topology_open,
-	.proc_release	= single_release,
-};
-
 static int topology_update_init(void)
 {
-	start_topology_update();
-
-	if (vphn_enabled)
-		topology_schedule_update();
-
-	if (!proc_create("powerpc/topology_updates", 0644, NULL, &topology_proc_ops))
-		return -ENOMEM;
-
 	topology_inited = 1;
 	return 0;
 }
diff --git a/arch/powerpc/mm/pgtable-frag.c b/arch/powerpc/mm/pgtable-frag.c
index ee4bd6d..97ae493 100644
--- a/arch/powerpc/mm/pgtable-frag.c
+++ b/arch/powerpc/mm/pgtable-frag.c
@@ -110,6 +110,9 @@
 {
 	struct page *page = virt_to_page(table);
 
+	if (PageReserved(page))
+		return free_reserved_page(page);
+
 	BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
 	if (atomic_dec_and_test(&page->pt_frag_refcount)) {
 		if (!kernel)
diff --git a/arch/powerpc/mm/ptdump/hashpagetable.c b/arch/powerpc/mm/ptdump/hashpagetable.c
index a2c33ef..5b8bd34c 100644
--- a/arch/powerpc/mm/ptdump/hashpagetable.c
+++ b/arch/powerpc/mm/ptdump/hashpagetable.c
@@ -258,7 +258,7 @@
 	for (i = 0; i < HPTES_PER_GROUP; i += 4, hpte_group += 4) {
 		lpar_rc = plpar_pte_read_4(0, hpte_group, (void *)ptes);
 
-		if (lpar_rc != H_SUCCESS)
+		if (lpar_rc)
 			continue;
 		for (j = 0; j < 4; j++) {
 			if (HPTE_V_COMPARE(ptes[j].v, want_v) &&
diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c
index de6e05e..c911cd7 100644
--- a/arch/powerpc/mm/ptdump/ptdump.c
+++ b/arch/powerpc/mm/ptdump/ptdump.c
@@ -74,6 +74,10 @@
 
 static struct addr_marker address_markers[] = {
 	{ 0,	"Start of kernel VM" },
+#ifdef MODULES_VADDR
+	{ 0,	"modules start" },
+	{ 0,	"modules end" },
+#endif
 	{ 0,	"vmalloc() Area" },
 	{ 0,	"vmalloc() End" },
 #ifdef CONFIG_PPC64
@@ -195,6 +199,24 @@
 	st->wx_pages += (addr - st->start_address) / PAGE_SIZE;
 }
 
+static void note_page_update_state(struct pg_state *st, unsigned long addr,
+				   unsigned int level, u64 val, unsigned long page_size)
+{
+	u64 flag = val & pg_level[level].mask;
+	u64 pa = val & PTE_RPN_MASK;
+
+	st->level = level;
+	st->current_flags = flag;
+	st->start_address = addr;
+	st->start_pa = pa;
+	st->page_size = page_size;
+
+	while (addr >= st->marker[1].start_address) {
+		st->marker++;
+		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+	}
+}
+
 static void note_page(struct pg_state *st, unsigned long addr,
 	       unsigned int level, u64 val, unsigned long page_size)
 {
@@ -203,13 +225,8 @@
 
 	/* At first no level is set */
 	if (!st->level) {
-		st->level = level;
-		st->current_flags = flag;
-		st->start_address = addr;
-		st->start_pa = pa;
-		st->last_pa = pa;
-		st->page_size = page_size;
 		pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
+		note_page_update_state(st, addr, level, val, page_size);
 	/*
 	 * Dump the section of virtual memory when:
 	 *   - the PTE flags from one entry to the next differs.
@@ -241,19 +258,9 @@
 		 * Address indicates we have passed the end of the
 		 * current section of virtual memory
 		 */
-		while (addr >= st->marker[1].start_address) {
-			st->marker++;
-			pt_dump_seq_printf(st->seq, "---[ %s ]---\n", st->marker->name);
-		}
-		st->start_address = addr;
-		st->start_pa = pa;
-		st->last_pa = pa;
-		st->page_size = page_size;
-		st->current_flags = flag;
-		st->level = level;
-	} else {
-		st->last_pa = pa;
+		note_page_update_state(st, addr, level, val, page_size);
 	}
+	st->last_pa = pa;
 }
 
 static void walk_pte(struct pg_state *st, pmd_t *pmd, unsigned long start)
@@ -348,7 +355,15 @@
 {
 	int i = 0;
 
+#ifdef CONFIG_PPC64
 	address_markers[i++].start_address = PAGE_OFFSET;
+#else
+	address_markers[i++].start_address = TASK_SIZE;
+#endif
+#ifdef MODULES_VADDR
+	address_markers[i++].start_address = MODULES_VADDR;
+	address_markers[i++].start_address = MODULES_END;
+#endif
 	address_markers[i++].start_address = VMALLOC_START;
 	address_markers[i++].start_address = VMALLOC_END;
 #ifdef CONFIG_PPC64
@@ -385,7 +400,7 @@
 	struct pg_state st = {
 		.seq = m,
 		.marker = address_markers,
-		.start_address = PAGE_OFFSET,
+		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
 	};
 
 #ifdef CONFIG_PPC64
@@ -429,7 +444,7 @@
 		.seq = NULL,
 		.marker = address_markers,
 		.check_wx = true,
-		.start_address = PAGE_OFFSET,
+		.start_address = IS_ENABLED(CONFIG_PPC64) ? PAGE_OFFSET : TASK_SIZE,
 	};
 
 #ifdef CONFIG_PPC64
diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h
index 55d4377..d0a67a1 100644
--- a/arch/powerpc/net/bpf_jit.h
+++ b/arch/powerpc/net/bpf_jit.h
@@ -11,6 +11,7 @@
 #ifndef __ASSEMBLY__
 
 #include <asm/types.h>
+#include <asm/ppc-opcode.h>
 
 #ifdef PPC64_ELF_ABI_v1
 #define FUNCTION_DESCR_SIZE	24
@@ -18,167 +19,10 @@
 #define FUNCTION_DESCR_SIZE	0
 #endif
 
-/*
- * 16-bit immediate helper macros: HA() is for use with sign-extending instrs
- * (e.g. LD, ADDI).  If the bottom 16 bits is "-ve", add another bit into the
- * top half to negate the effect (i.e. 0xffff + 1 = 0x(1)0000).
- */
-#define IMM_H(i)		((uintptr_t)(i)>>16)
-#define IMM_HA(i)		(((uintptr_t)(i)>>16) +			      \
-					(((uintptr_t)(i) & 0x8000) >> 15))
-#define IMM_L(i)		((uintptr_t)(i) & 0xffff)
-
 #define PLANT_INSTR(d, idx, instr)					      \
 	do { if (d) { (d)[idx] = instr; } idx++; } while (0)
 #define EMIT(instr)		PLANT_INSTR(image, ctx->idx, instr)
 
-#define PPC_NOP()		EMIT(PPC_INST_NOP)
-#define PPC_BLR()		EMIT(PPC_INST_BLR)
-#define PPC_BLRL()		EMIT(PPC_INST_BLRL)
-#define PPC_MTLR(r)		EMIT(PPC_INST_MTLR | ___PPC_RT(r))
-#define PPC_BCTR()		EMIT(PPC_INST_BCTR)
-#define PPC_MTCTR(r)		EMIT(PPC_INST_MTCTR | ___PPC_RT(r))
-#define PPC_ADDI(d, a, i)	EMIT(PPC_INST_ADDI | ___PPC_RT(d) |	      \
-				     ___PPC_RA(a) | IMM_L(i))
-#define PPC_MR(d, a)		PPC_OR(d, a, a)
-#define PPC_LI(r, i)		PPC_ADDI(r, 0, i)
-#define PPC_ADDIS(d, a, i)	EMIT(PPC_INST_ADDIS |			      \
-				     ___PPC_RT(d) | ___PPC_RA(a) | IMM_L(i))
-#define PPC_LIS(r, i)		PPC_ADDIS(r, 0, i)
-#define PPC_STD(r, base, i)	EMIT(PPC_INST_STD | ___PPC_RS(r) |	      \
-				     ___PPC_RA(base) | ((i) & 0xfffc))
-#define PPC_STDX(r, base, b)	EMIT(PPC_INST_STDX | ___PPC_RS(r) |	      \
-				     ___PPC_RA(base) | ___PPC_RB(b))
-#define PPC_STDU(r, base, i)	EMIT(PPC_INST_STDU | ___PPC_RS(r) |	      \
-				     ___PPC_RA(base) | ((i) & 0xfffc))
-#define PPC_STW(r, base, i)	EMIT(PPC_INST_STW | ___PPC_RS(r) |	      \
-				     ___PPC_RA(base) | IMM_L(i))
-#define PPC_STWU(r, base, i)	EMIT(PPC_INST_STWU | ___PPC_RS(r) |	      \
-				     ___PPC_RA(base) | IMM_L(i))
-#define PPC_STH(r, base, i)	EMIT(PPC_INST_STH | ___PPC_RS(r) |	      \
-				     ___PPC_RA(base) | IMM_L(i))
-#define PPC_STB(r, base, i)	EMIT(PPC_INST_STB | ___PPC_RS(r) |	      \
-				     ___PPC_RA(base) | IMM_L(i))
-
-#define PPC_LBZ(r, base, i)	EMIT(PPC_INST_LBZ | ___PPC_RT(r) |	      \
-				     ___PPC_RA(base) | IMM_L(i))
-#define PPC_LD(r, base, i)	EMIT(PPC_INST_LD | ___PPC_RT(r) |	      \
-				     ___PPC_RA(base) | ((i) & 0xfffc))
-#define PPC_LDX(r, base, b)	EMIT(PPC_INST_LDX | ___PPC_RT(r) |	      \
-				     ___PPC_RA(base) | ___PPC_RB(b))
-#define PPC_LWZ(r, base, i)	EMIT(PPC_INST_LWZ | ___PPC_RT(r) |	      \
-				     ___PPC_RA(base) | IMM_L(i))
-#define PPC_LHZ(r, base, i)	EMIT(PPC_INST_LHZ | ___PPC_RT(r) |	      \
-				     ___PPC_RA(base) | IMM_L(i))
-#define PPC_LHBRX(r, base, b)	EMIT(PPC_INST_LHBRX | ___PPC_RT(r) |	      \
-				     ___PPC_RA(base) | ___PPC_RB(b))
-#define PPC_LDBRX(r, base, b)	EMIT(PPC_INST_LDBRX | ___PPC_RT(r) |	      \
-				     ___PPC_RA(base) | ___PPC_RB(b))
-
-#define PPC_BPF_LDARX(t, a, b, eh) EMIT(PPC_INST_LDARX | ___PPC_RT(t) |	      \
-					___PPC_RA(a) | ___PPC_RB(b) |	      \
-					__PPC_EH(eh))
-#define PPC_BPF_LWARX(t, a, b, eh) EMIT(PPC_INST_LWARX | ___PPC_RT(t) |	      \
-					___PPC_RA(a) | ___PPC_RB(b) |	      \
-					__PPC_EH(eh))
-#define PPC_BPF_STWCX(s, a, b)	EMIT(PPC_INST_STWCX | ___PPC_RS(s) |	      \
-					___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_BPF_STDCX(s, a, b)	EMIT(PPC_INST_STDCX | ___PPC_RS(s) |	      \
-					___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_CMPWI(a, i)		EMIT(PPC_INST_CMPWI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPDI(a, i)		EMIT(PPC_INST_CMPDI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPW(a, b)		EMIT(PPC_INST_CMPW | ___PPC_RA(a) |	      \
-					___PPC_RB(b))
-#define PPC_CMPD(a, b)		EMIT(PPC_INST_CMPD | ___PPC_RA(a) |	      \
-					___PPC_RB(b))
-#define PPC_CMPLWI(a, i)	EMIT(PPC_INST_CMPLWI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPLDI(a, i)	EMIT(PPC_INST_CMPLDI | ___PPC_RA(a) | IMM_L(i))
-#define PPC_CMPLW(a, b)		EMIT(PPC_INST_CMPLW | ___PPC_RA(a) |	      \
-					___PPC_RB(b))
-#define PPC_CMPLD(a, b)		EMIT(PPC_INST_CMPLD | ___PPC_RA(a) |	      \
-					___PPC_RB(b))
-
-#define PPC_SUB(d, a, b)	EMIT(PPC_INST_SUB | ___PPC_RT(d) |	      \
-				     ___PPC_RB(a) | ___PPC_RA(b))
-#define PPC_ADD(d, a, b)	EMIT(PPC_INST_ADD | ___PPC_RT(d) |	      \
-				     ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MULD(d, a, b)	EMIT(PPC_INST_MULLD | ___PPC_RT(d) |	      \
-				     ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MULW(d, a, b)	EMIT(PPC_INST_MULLW | ___PPC_RT(d) |	      \
-				     ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MULHWU(d, a, b)	EMIT(PPC_INST_MULHWU | ___PPC_RT(d) |	      \
-				     ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_MULI(d, a, i)	EMIT(PPC_INST_MULLI | ___PPC_RT(d) |	      \
-				     ___PPC_RA(a) | IMM_L(i))
-#define PPC_DIVWU(d, a, b)	EMIT(PPC_INST_DIVWU | ___PPC_RT(d) |	      \
-				     ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_DIVDU(d, a, b)	EMIT(PPC_INST_DIVDU | ___PPC_RT(d) |	      \
-				     ___PPC_RA(a) | ___PPC_RB(b))
-#define PPC_AND(d, a, b)	EMIT(PPC_INST_AND | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_ANDI(d, a, i)	EMIT(PPC_INST_ANDI | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | IMM_L(i))
-#define PPC_AND_DOT(d, a, b)	EMIT(PPC_INST_ANDDOT | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_OR(d, a, b)		EMIT(PPC_INST_OR | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_MR(d, a)		PPC_OR(d, a, a)
-#define PPC_ORI(d, a, i)	EMIT(PPC_INST_ORI | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | IMM_L(i))
-#define PPC_ORIS(d, a, i)	EMIT(PPC_INST_ORIS | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | IMM_L(i))
-#define PPC_XOR(d, a, b)	EMIT(PPC_INST_XOR | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | ___PPC_RB(b))
-#define PPC_XORI(d, a, i)	EMIT(PPC_INST_XORI | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | IMM_L(i))
-#define PPC_XORIS(d, a, i)	EMIT(PPC_INST_XORIS | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | IMM_L(i))
-#define PPC_EXTSW(d, a)		EMIT(PPC_INST_EXTSW | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a))
-#define PPC_SLW(d, a, s)	EMIT(PPC_INST_SLW | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SLD(d, a, s)	EMIT(PPC_INST_SLD | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRW(d, a, s)	EMIT(PPC_INST_SRW | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRAW(d, a, s)	EMIT(PPC_INST_SRAW | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRAWI(d, a, i)	EMIT(PPC_INST_SRAWI | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | __PPC_SH(i))
-#define PPC_SRD(d, a, s)	EMIT(PPC_INST_SRD | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRAD(d, a, s)	EMIT(PPC_INST_SRAD | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | ___PPC_RB(s))
-#define PPC_SRADI(d, a, i)	EMIT(PPC_INST_SRADI | ___PPC_RA(d) |	      \
-				     ___PPC_RS(a) | __PPC_SH64(i))
-#define PPC_RLWINM(d, a, i, mb, me)	EMIT(PPC_INST_RLWINM | ___PPC_RA(d) | \
-					___PPC_RS(a) | __PPC_SH(i) |	      \
-					__PPC_MB(mb) | __PPC_ME(me))
-#define PPC_RLWINM_DOT(d, a, i, mb, me)	EMIT(PPC_INST_RLWINM_DOT |	      \
-					___PPC_RA(d) | ___PPC_RS(a) |	      \
-					__PPC_SH(i) | __PPC_MB(mb) |	      \
-					__PPC_ME(me))
-#define PPC_RLWIMI(d, a, i, mb, me)	EMIT(PPC_INST_RLWIMI | ___PPC_RA(d) | \
-					___PPC_RS(a) | __PPC_SH(i) |	      \
-					__PPC_MB(mb) | __PPC_ME(me))
-#define PPC_RLDICL(d, a, i, mb)		EMIT(PPC_INST_RLDICL | ___PPC_RA(d) | \
-					___PPC_RS(a) | __PPC_SH64(i) |	      \
-					__PPC_MB64(mb))
-#define PPC_RLDICR(d, a, i, me)		EMIT(PPC_INST_RLDICR | ___PPC_RA(d) | \
-					___PPC_RS(a) | __PPC_SH64(i) |	      \
-					__PPC_ME64(me))
-
-/* slwi = rlwinm Rx, Ry, n, 0, 31-n */
-#define PPC_SLWI(d, a, i)	PPC_RLWINM(d, a, i, 0, 31-(i))
-/* srwi = rlwinm Rx, Ry, 32-n, n, 31 */
-#define PPC_SRWI(d, a, i)	PPC_RLWINM(d, a, 32-(i), i, 31)
-/* sldi = rldicr Rx, Ry, n, 63-n */
-#define PPC_SLDI(d, a, i)	PPC_RLDICR(d, a, i, 63-(i))
-/* sldi = rldicl Rx, Ry, 64-n, n */
-#define PPC_SRDI(d, a, i)	PPC_RLDICL(d, a, 64-(i), i)
-
-#define PPC_NEG(d, a)		EMIT(PPC_INST_NEG | ___PPC_RT(d) | ___PPC_RA(a))
-
 /* Long jump; (unconditional 'branch') */
 #define PPC_JMP(dest)		EMIT(PPC_INST_BRANCH |			      \
 				     (((dest) - (ctx->idx * 4)) & 0x03fffffc))
@@ -191,11 +35,11 @@
 #define PPC_LI32(d, i)		do {					      \
 		if ((int)(uintptr_t)(i) >= -32768 &&			      \
 				(int)(uintptr_t)(i) < 32768)		      \
-			PPC_LI(d, i);					      \
+			EMIT(PPC_RAW_LI(d, i));				      \
 		else {							      \
-			PPC_LIS(d, IMM_H(i));				      \
+			EMIT(PPC_RAW_LIS(d, IMM_H(i)));			      \
 			if (IMM_L(i))					      \
-				PPC_ORI(d, d, IMM_L(i));		      \
+				EMIT(PPC_RAW_ORI(d, d, IMM_L(i)));	      \
 		} } while(0)
 
 #define PPC_LI64(d, i)		do {					      \
@@ -204,19 +48,21 @@
 			PPC_LI32(d, i);					      \
 		else {							      \
 			if (!((uintptr_t)(i) & 0xffff800000000000ULL))	      \
-				PPC_LI(d, ((uintptr_t)(i) >> 32) & 0xffff);   \
+				EMIT(PPC_RAW_LI(d, ((uintptr_t)(i) >> 32) &   \
+						0xffff));		      \
 			else {						      \
-				PPC_LIS(d, ((uintptr_t)(i) >> 48));	      \
+				EMIT(PPC_RAW_LIS(d, ((uintptr_t)(i) >> 48))); \
 				if ((uintptr_t)(i) & 0x0000ffff00000000ULL)   \
-					PPC_ORI(d, d,			      \
-					  ((uintptr_t)(i) >> 32) & 0xffff);   \
+					EMIT(PPC_RAW_ORI(d, d,		      \
+					  ((uintptr_t)(i) >> 32) & 0xffff));  \
 			}						      \
-			PPC_SLDI(d, d, 32);				      \
+			EMIT(PPC_RAW_SLDI(d, d, 32));			      \
 			if ((uintptr_t)(i) & 0x00000000ffff0000ULL)	      \
-				PPC_ORIS(d, d,				      \
-					 ((uintptr_t)(i) >> 16) & 0xffff);    \
+				EMIT(PPC_RAW_ORIS(d, d,			      \
+					 ((uintptr_t)(i) >> 16) & 0xffff));   \
 			if ((uintptr_t)(i) & 0x000000000000ffffULL)	      \
-				PPC_ORI(d, d, (uintptr_t)(i) & 0xffff);	      \
+				EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) &       \
+							0xffff));             \
 		} } while (0)
 
 #ifdef CONFIG_PPC64
@@ -240,7 +86,7 @@
 #define PPC_BCC(cond, dest)	do {					      \
 		if (is_nearbranch((dest) - (ctx->idx * 4))) {		      \
 			PPC_BCC_SHORT(cond, dest);			      \
-			PPC_NOP();					      \
+			EMIT(PPC_RAW_NOP());				      \
 		} else {						      \
 			/* Flip the 'T or F' bit to invert comparison */      \
 			PPC_BCC_SHORT(cond ^ COND_CMP_TRUE, (ctx->idx+2)*4);  \
diff --git a/arch/powerpc/net/bpf_jit32.h b/arch/powerpc/net/bpf_jit32.h
index 4ec2a9f..448dfd4 100644
--- a/arch/powerpc/net/bpf_jit32.h
+++ b/arch/powerpc/net/bpf_jit32.h
@@ -72,21 +72,21 @@
 DECLARE_LOAD_FUNC(sk_load_byte);
 DECLARE_LOAD_FUNC(sk_load_byte_msh);
 
-#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LBZ(r, base, i);   \
-		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
-			PPC_LBZ(r, r, IMM_L(i)); } } while(0)
+#define PPC_LBZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LBZ(r, base, i));   \
+		else {	EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));	      \
+			EMIT(PPC_RAW_LBZ(r, r, IMM_L(i))); } } while(0)
 
-#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) PPC_LD(r, base, i);     \
-		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
-			PPC_LD(r, r, IMM_L(i)); } } while(0)
+#define PPC_LD_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LD(r, base, i));     \
+		else {	EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));			\
+			EMIT(PPC_RAW_LD(r, r, IMM_L(i))); } } while(0)
 
-#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LWZ(r, base, i);   \
-		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
-			PPC_LWZ(r, r, IMM_L(i)); } } while(0)
+#define PPC_LWZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LWZ(r, base, i));   \
+		else {	EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));			\
+			EMIT(PPC_RAW_LWZ(r, r, IMM_L(i))); } } while(0)
 
-#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) PPC_LHZ(r, base, i);   \
-		else {	PPC_ADDIS(r, base, IMM_HA(i));			      \
-			PPC_LHZ(r, r, IMM_L(i)); } } while(0)
+#define PPC_LHZ_OFFS(r, base, i) do { if ((i) < 32768) EMIT(PPC_RAW_LHZ(r, base, i));   \
+		else {	EMIT(PPC_RAW_ADDIS(r, base, IMM_HA(i)));			\
+			EMIT(PPC_RAW_LHZ(r, r, IMM_L(i))); } } while(0)
 
 #ifdef CONFIG_PPC64
 #define PPC_LL_OFFS(r, base, i) do { PPC_LD_OFFS(r, base, i); } while(0)
@@ -107,20 +107,20 @@
 	} while(0)
 #endif
 #else
-#define PPC_BPF_LOAD_CPU(r) do { PPC_LI(r, 0); } while(0)
+#define PPC_BPF_LOAD_CPU(r) do { EMIT(PPC_RAW_LI(r, 0)); } while(0)
 #endif
 
 #define PPC_LHBRX_OFFS(r, base, i) \
-		do { PPC_LI32(r, i); PPC_LHBRX(r, r, base); } while(0)
+		do { PPC_LI32(r, i); EMIT(PPC_RAW_LHBRX(r, r, base)); } while(0)
 #ifdef __LITTLE_ENDIAN__
 #define PPC_NTOHS_OFFS(r, base, i)	PPC_LHBRX_OFFS(r, base, i)
 #else
 #define PPC_NTOHS_OFFS(r, base, i)	PPC_LHZ_OFFS(r, base, i)
 #endif
 
-#define PPC_BPF_LL(r, base, i) do { PPC_LWZ(r, base, i); } while(0)
-#define PPC_BPF_STL(r, base, i) do { PPC_STW(r, base, i); } while(0)
-#define PPC_BPF_STLU(r, base, i) do { PPC_STWU(r, base, i); } while(0)
+#define PPC_BPF_LL(r, base, i) do { EMIT(PPC_RAW_LWZ(r, base, i)); } while(0)
+#define PPC_BPF_STL(r, base, i) do { EMIT(PPC_RAW_STW(r, base, i)); } while(0)
+#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STWU(r, base, i)); } while(0)
 
 #define SEEN_DATAREF 0x10000 /* might call external helpers */
 #define SEEN_XREG    0x20000 /* X reg is used */
diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h
index cf3a7e3..2e33c667 100644
--- a/arch/powerpc/net/bpf_jit64.h
+++ b/arch/powerpc/net/bpf_jit64.h
@@ -70,19 +70,21 @@
  */
 #define PPC_BPF_LL(r, base, i) do {					      \
 				if ((i) % 4) {				      \
-					PPC_LI(b2p[TMP_REG_2], (i));	      \
-					PPC_LDX(r, base, b2p[TMP_REG_2]);     \
+					EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\
+					EMIT(PPC_RAW_LDX(r, base,	      \
+							b2p[TMP_REG_2]));     \
 				} else					      \
-					PPC_LD(r, base, i);		      \
+					EMIT(PPC_RAW_LD(r, base, i));	      \
 				} while(0)
 #define PPC_BPF_STL(r, base, i) do {					      \
 				if ((i) % 4) {				      \
-					PPC_LI(b2p[TMP_REG_2], (i));	      \
-					PPC_STDX(r, base, b2p[TMP_REG_2]);    \
+					EMIT(PPC_RAW_LI(b2p[TMP_REG_2], (i)));\
+					EMIT(PPC_RAW_STDX(r, base,	      \
+							b2p[TMP_REG_2]));     \
 				} else					      \
-					PPC_STD(r, base, i);		      \
+					EMIT(PPC_RAW_STD(r, base, i));	      \
 				} while(0)
-#define PPC_BPF_STLU(r, base, i) do { PPC_STDU(r, base, i); } while(0)
+#define PPC_BPF_STLU(r, base, i) do { EMIT(PPC_RAW_STDU(r, base, i)); } while(0)
 
 #define SEEN_FUNC	0x1000 /* might call external helpers */
 #define SEEN_STACK	0x2000 /* uses BPF stack */
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 0acc9d5..16d09b3 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -61,7 +61,7 @@
 		PPC_LWZ_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
 							 data_len));
 		PPC_LWZ_OFFS(r_HL, r_skb, offsetof(struct sk_buff, len));
-		PPC_SUB(r_HL, r_HL, r_scratch1);
+		EMIT(PPC_RAW_SUB(r_HL, r_HL, r_scratch1));
 		PPC_LL_OFFS(r_D, r_skb, offsetof(struct sk_buff, data));
 	}
 
@@ -70,12 +70,12 @@
 		 * TODO: Could also detect whether first instr. sets X and
 		 * avoid this (as below, with A).
 		 */
-		PPC_LI(r_X, 0);
+		EMIT(PPC_RAW_LI(r_X, 0));
 	}
 
 	/* make sure we dont leak kernel information to user */
 	if (bpf_needs_clear_a(&filter[0]))
-		PPC_LI(r_A, 0);
+		EMIT(PPC_RAW_LI(r_A, 0));
 }
 
 static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx)
@@ -83,10 +83,10 @@
 	int i;
 
 	if (ctx->seen & (SEEN_MEM | SEEN_DATAREF)) {
-		PPC_ADDI(1, 1, BPF_PPC_STACKFRAME);
+		EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME));
 		if (ctx->seen & SEEN_DATAREF) {
 			PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
-			PPC_MTLR(0);
+			EMIT(PPC_RAW_MTLR(0));
 			PPC_BPF_LL(r_D, 1, -(REG_SZ*(32-r_D)));
 			PPC_BPF_LL(r_HL, 1, -(REG_SZ*(32-r_HL)));
 		}
@@ -100,7 +100,7 @@
 	}
 	/* The RETs have left a return value in R3. */
 
-	PPC_BLR();
+	EMIT(PPC_RAW_BLR());
 }
 
 #define CHOOSE_LOAD_FUNC(K, func) \
@@ -134,124 +134,124 @@
 			/*** ALU ops ***/
 		case BPF_ALU | BPF_ADD | BPF_X: /* A += X; */
 			ctx->seen |= SEEN_XREG;
-			PPC_ADD(r_A, r_A, r_X);
+			EMIT(PPC_RAW_ADD(r_A, r_A, r_X));
 			break;
 		case BPF_ALU | BPF_ADD | BPF_K: /* A += K; */
 			if (!K)
 				break;
-			PPC_ADDI(r_A, r_A, IMM_L(K));
+			EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(K)));
 			if (K >= 32768)
-				PPC_ADDIS(r_A, r_A, IMM_HA(K));
+				EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(K)));
 			break;
 		case BPF_ALU | BPF_SUB | BPF_X: /* A -= X; */
 			ctx->seen |= SEEN_XREG;
-			PPC_SUB(r_A, r_A, r_X);
+			EMIT(PPC_RAW_SUB(r_A, r_A, r_X));
 			break;
 		case BPF_ALU | BPF_SUB | BPF_K: /* A -= K */
 			if (!K)
 				break;
-			PPC_ADDI(r_A, r_A, IMM_L(-K));
+			EMIT(PPC_RAW_ADDI(r_A, r_A, IMM_L(-K)));
 			if (K >= 32768)
-				PPC_ADDIS(r_A, r_A, IMM_HA(-K));
+				EMIT(PPC_RAW_ADDIS(r_A, r_A, IMM_HA(-K)));
 			break;
 		case BPF_ALU | BPF_MUL | BPF_X: /* A *= X; */
 			ctx->seen |= SEEN_XREG;
-			PPC_MULW(r_A, r_A, r_X);
+			EMIT(PPC_RAW_MULW(r_A, r_A, r_X));
 			break;
 		case BPF_ALU | BPF_MUL | BPF_K: /* A *= K */
 			if (K < 32768)
-				PPC_MULI(r_A, r_A, K);
+				EMIT(PPC_RAW_MULI(r_A, r_A, K));
 			else {
 				PPC_LI32(r_scratch1, K);
-				PPC_MULW(r_A, r_A, r_scratch1);
+				EMIT(PPC_RAW_MULW(r_A, r_A, r_scratch1));
 			}
 			break;
 		case BPF_ALU | BPF_MOD | BPF_X: /* A %= X; */
 		case BPF_ALU | BPF_DIV | BPF_X: /* A /= X; */
 			ctx->seen |= SEEN_XREG;
-			PPC_CMPWI(r_X, 0);
+			EMIT(PPC_RAW_CMPWI(r_X, 0));
 			if (ctx->pc_ret0 != -1) {
 				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
 			} else {
 				PPC_BCC_SHORT(COND_NE, (ctx->idx*4)+12);
-				PPC_LI(r_ret, 0);
+				EMIT(PPC_RAW_LI(r_ret, 0));
 				PPC_JMP(exit_addr);
 			}
 			if (code == (BPF_ALU | BPF_MOD | BPF_X)) {
-				PPC_DIVWU(r_scratch1, r_A, r_X);
-				PPC_MULW(r_scratch1, r_X, r_scratch1);
-				PPC_SUB(r_A, r_A, r_scratch1);
+				EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_X));
+				EMIT(PPC_RAW_MULW(r_scratch1, r_X, r_scratch1));
+				EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1));
 			} else {
-				PPC_DIVWU(r_A, r_A, r_X);
+				EMIT(PPC_RAW_DIVWU(r_A, r_A, r_X));
 			}
 			break;
 		case BPF_ALU | BPF_MOD | BPF_K: /* A %= K; */
 			PPC_LI32(r_scratch2, K);
-			PPC_DIVWU(r_scratch1, r_A, r_scratch2);
-			PPC_MULW(r_scratch1, r_scratch2, r_scratch1);
-			PPC_SUB(r_A, r_A, r_scratch1);
+			EMIT(PPC_RAW_DIVWU(r_scratch1, r_A, r_scratch2));
+			EMIT(PPC_RAW_MULW(r_scratch1, r_scratch2, r_scratch1));
+			EMIT(PPC_RAW_SUB(r_A, r_A, r_scratch1));
 			break;
 		case BPF_ALU | BPF_DIV | BPF_K: /* A /= K */
 			if (K == 1)
 				break;
 			PPC_LI32(r_scratch1, K);
-			PPC_DIVWU(r_A, r_A, r_scratch1);
+			EMIT(PPC_RAW_DIVWU(r_A, r_A, r_scratch1));
 			break;
 		case BPF_ALU | BPF_AND | BPF_X:
 			ctx->seen |= SEEN_XREG;
-			PPC_AND(r_A, r_A, r_X);
+			EMIT(PPC_RAW_AND(r_A, r_A, r_X));
 			break;
 		case BPF_ALU | BPF_AND | BPF_K:
 			if (!IMM_H(K))
-				PPC_ANDI(r_A, r_A, K);
+				EMIT(PPC_RAW_ANDI(r_A, r_A, K));
 			else {
 				PPC_LI32(r_scratch1, K);
-				PPC_AND(r_A, r_A, r_scratch1);
+				EMIT(PPC_RAW_AND(r_A, r_A, r_scratch1));
 			}
 			break;
 		case BPF_ALU | BPF_OR | BPF_X:
 			ctx->seen |= SEEN_XREG;
-			PPC_OR(r_A, r_A, r_X);
+			EMIT(PPC_RAW_OR(r_A, r_A, r_X));
 			break;
 		case BPF_ALU | BPF_OR | BPF_K:
 			if (IMM_L(K))
-				PPC_ORI(r_A, r_A, IMM_L(K));
+				EMIT(PPC_RAW_ORI(r_A, r_A, IMM_L(K)));
 			if (K >= 65536)
-				PPC_ORIS(r_A, r_A, IMM_H(K));
+				EMIT(PPC_RAW_ORIS(r_A, r_A, IMM_H(K)));
 			break;
 		case BPF_ANC | SKF_AD_ALU_XOR_X:
 		case BPF_ALU | BPF_XOR | BPF_X: /* A ^= X */
 			ctx->seen |= SEEN_XREG;
-			PPC_XOR(r_A, r_A, r_X);
+			EMIT(PPC_RAW_XOR(r_A, r_A, r_X));
 			break;
 		case BPF_ALU | BPF_XOR | BPF_K: /* A ^= K */
 			if (IMM_L(K))
-				PPC_XORI(r_A, r_A, IMM_L(K));
+				EMIT(PPC_RAW_XORI(r_A, r_A, IMM_L(K)));
 			if (K >= 65536)
-				PPC_XORIS(r_A, r_A, IMM_H(K));
+				EMIT(PPC_RAW_XORIS(r_A, r_A, IMM_H(K)));
 			break;
 		case BPF_ALU | BPF_LSH | BPF_X: /* A <<= X; */
 			ctx->seen |= SEEN_XREG;
-			PPC_SLW(r_A, r_A, r_X);
+			EMIT(PPC_RAW_SLW(r_A, r_A, r_X));
 			break;
 		case BPF_ALU | BPF_LSH | BPF_K:
 			if (K == 0)
 				break;
 			else
-				PPC_SLWI(r_A, r_A, K);
+				EMIT(PPC_RAW_SLWI(r_A, r_A, K));
 			break;
 		case BPF_ALU | BPF_RSH | BPF_X: /* A >>= X; */
 			ctx->seen |= SEEN_XREG;
-			PPC_SRW(r_A, r_A, r_X);
+			EMIT(PPC_RAW_SRW(r_A, r_A, r_X));
 			break;
 		case BPF_ALU | BPF_RSH | BPF_K: /* A >>= K; */
 			if (K == 0)
 				break;
 			else
-				PPC_SRWI(r_A, r_A, K);
+				EMIT(PPC_RAW_SRWI(r_A, r_A, K));
 			break;
 		case BPF_ALU | BPF_NEG:
-			PPC_NEG(r_A, r_A);
+			EMIT(PPC_RAW_NEG(r_A, r_A));
 			break;
 		case BPF_RET | BPF_K:
 			PPC_LI32(r_ret, K);
@@ -277,24 +277,24 @@
 				if (ctx->seen)
 					PPC_JMP(exit_addr);
 				else
-					PPC_BLR();
+					EMIT(PPC_RAW_BLR());
 			}
 			break;
 		case BPF_RET | BPF_A:
-			PPC_MR(r_ret, r_A);
+			EMIT(PPC_RAW_MR(r_ret, r_A));
 			if (i != flen - 1) {
 				if (ctx->seen)
 					PPC_JMP(exit_addr);
 				else
-					PPC_BLR();
+					EMIT(PPC_RAW_BLR());
 			}
 			break;
 		case BPF_MISC | BPF_TAX: /* X = A */
-			PPC_MR(r_X, r_A);
+			EMIT(PPC_RAW_MR(r_X, r_A));
 			break;
 		case BPF_MISC | BPF_TXA: /* A = X */
 			ctx->seen |= SEEN_XREG;
-			PPC_MR(r_A, r_X);
+			EMIT(PPC_RAW_MR(r_A, r_X));
 			break;
 
 			/*** Constant loads/M[] access ***/
@@ -305,19 +305,19 @@
 			PPC_LI32(r_X, K);
 			break;
 		case BPF_LD | BPF_MEM: /* A = mem[K] */
-			PPC_MR(r_A, r_M + (K & 0xf));
+			EMIT(PPC_RAW_MR(r_A, r_M + (K & 0xf)));
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
 		case BPF_LDX | BPF_MEM: /* X = mem[K] */
-			PPC_MR(r_X, r_M + (K & 0xf));
+			EMIT(PPC_RAW_MR(r_X, r_M + (K & 0xf)));
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
 		case BPF_ST: /* mem[K] = A */
-			PPC_MR(r_M + (K & 0xf), r_A);
+			EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_A));
 			ctx->seen |= SEEN_MEM | (1<<(K & 0xf));
 			break;
 		case BPF_STX: /* mem[K] = X */
-			PPC_MR(r_M + (K & 0xf), r_X);
+			EMIT(PPC_RAW_MR(r_M + (K & 0xf), r_X));
 			ctx->seen |= SEEN_XREG | SEEN_MEM | (1<<(K & 0xf));
 			break;
 		case BPF_LD | BPF_W | BPF_LEN: /*	A = skb->len; */
@@ -346,13 +346,13 @@
 						type) != 2);
 			PPC_LL_OFFS(r_scratch1, r_skb, offsetof(struct sk_buff,
 								dev));
-			PPC_CMPDI(r_scratch1, 0);
+			EMIT(PPC_RAW_CMPDI(r_scratch1, 0));
 			if (ctx->pc_ret0 != -1) {
 				PPC_BCC(COND_EQ, addrs[ctx->pc_ret0]);
 			} else {
 				/* Exit, returning 0; first pass hits here. */
 				PPC_BCC_SHORT(COND_NE, ctx->idx * 4 + 12);
-				PPC_LI(r_ret, 0);
+				EMIT(PPC_RAW_LI(r_ret, 0));
 				PPC_JMP(exit_addr);
 			}
 			if (code == (BPF_ANC | SKF_AD_IFINDEX)) {
@@ -383,9 +383,9 @@
 		case BPF_ANC | SKF_AD_VLAN_TAG_PRESENT:
 			PPC_LBZ_OFFS(r_A, r_skb, PKT_VLAN_PRESENT_OFFSET());
 			if (PKT_VLAN_PRESENT_BIT)
-				PPC_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT);
+				EMIT(PPC_RAW_SRWI(r_A, r_A, PKT_VLAN_PRESENT_BIT));
 			if (PKT_VLAN_PRESENT_BIT < 7)
-				PPC_ANDI(r_A, r_A, 1);
+				EMIT(PPC_RAW_ANDI(r_A, r_A, 1));
 			break;
 		case BPF_ANC | SKF_AD_QUEUE:
 			BUILD_BUG_ON(sizeof_field(struct sk_buff,
@@ -395,8 +395,8 @@
 			break;
 		case BPF_ANC | SKF_AD_PKTTYPE:
 			PPC_LBZ_OFFS(r_A, r_skb, PKT_TYPE_OFFSET());
-			PPC_ANDI(r_A, r_A, PKT_TYPE_MAX);
-			PPC_SRWI(r_A, r_A, 5);
+			EMIT(PPC_RAW_ANDI(r_A, r_A, PKT_TYPE_MAX));
+			EMIT(PPC_RAW_SRWI(r_A, r_A, 5));
 			break;
 		case BPF_ANC | SKF_AD_CPU:
 			PPC_BPF_LOAD_CPU(r_A);
@@ -414,9 +414,9 @@
 			/* Load from [K]. */
 			ctx->seen |= SEEN_DATAREF;
 			PPC_FUNC_ADDR(r_scratch1, func);
-			PPC_MTLR(r_scratch1);
+			EMIT(PPC_RAW_MTLR(r_scratch1));
 			PPC_LI32(r_addr, K);
-			PPC_BLRL();
+			EMIT(PPC_RAW_BLRL());
 			/*
 			 * Helper returns 'lt' condition on error, and an
 			 * appropriate return value in r3
@@ -440,11 +440,11 @@
 			 */
 			ctx->seen |= SEEN_DATAREF | SEEN_XREG;
 			PPC_FUNC_ADDR(r_scratch1, func);
-			PPC_MTLR(r_scratch1);
-			PPC_ADDI(r_addr, r_X, IMM_L(K));
+			EMIT(PPC_RAW_MTLR(r_scratch1));
+			EMIT(PPC_RAW_ADDI(r_addr, r_X, IMM_L(K)));
 			if (K >= 32768)
-				PPC_ADDIS(r_addr, r_addr, IMM_HA(K));
-			PPC_BLRL();
+				EMIT(PPC_RAW_ADDIS(r_addr, r_addr, IMM_HA(K)));
+			EMIT(PPC_RAW_BLRL());
 			/* If error, cr0.LT set */
 			PPC_BCC(COND_LT, exit_addr);
 			break;
@@ -489,30 +489,30 @@
 			case BPF_JMP | BPF_JGE | BPF_X:
 			case BPF_JMP | BPF_JEQ | BPF_X:
 				ctx->seen |= SEEN_XREG;
-				PPC_CMPLW(r_A, r_X);
+				EMIT(PPC_RAW_CMPLW(r_A, r_X));
 				break;
 			case BPF_JMP | BPF_JSET | BPF_X:
 				ctx->seen |= SEEN_XREG;
-				PPC_AND_DOT(r_scratch1, r_A, r_X);
+				EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A, r_X));
 				break;
 			case BPF_JMP | BPF_JEQ | BPF_K:
 			case BPF_JMP | BPF_JGT | BPF_K:
 			case BPF_JMP | BPF_JGE | BPF_K:
 				if (K < 32768)
-					PPC_CMPLWI(r_A, K);
+					EMIT(PPC_RAW_CMPLWI(r_A, K));
 				else {
 					PPC_LI32(r_scratch1, K);
-					PPC_CMPLW(r_A, r_scratch1);
+					EMIT(PPC_RAW_CMPLW(r_A, r_scratch1));
 				}
 				break;
 			case BPF_JMP | BPF_JSET | BPF_K:
 				if (K < 32768)
 					/* PPC_ANDI is /only/ dot-form */
-					PPC_ANDI(r_scratch1, r_A, K);
+					EMIT(PPC_RAW_ANDI(r_scratch1, r_A, K));
 				else {
 					PPC_LI32(r_scratch1, K);
-					PPC_AND_DOT(r_scratch1, r_A,
-						    r_scratch1);
+					EMIT(PPC_RAW_AND_DOT(r_scratch1, r_A,
+						    r_scratch1));
 				}
 				break;
 			}
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index be3517e..022103c 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -95,12 +95,12 @@
 	 * invoked through a tail call.
 	 */
 	if (ctx->seen & SEEN_TAILCALL) {
-		PPC_LI(b2p[TMP_REG_1], 0);
+		EMIT(PPC_RAW_LI(b2p[TMP_REG_1], 0));
 		/* this goes in the redzone */
 		PPC_BPF_STL(b2p[TMP_REG_1], 1, -(BPF_PPC_STACK_SAVE + 8));
 	} else {
-		PPC_NOP();
-		PPC_NOP();
+		EMIT(PPC_RAW_NOP());
+		EMIT(PPC_RAW_NOP());
 	}
 
 #define BPF_TAILCALL_PROLOGUE_SIZE	8
@@ -129,8 +129,8 @@
 
 	/* Setup frame pointer to point to the bpf stack area */
 	if (bpf_is_seen_register(ctx, BPF_REG_FP))
-		PPC_ADDI(b2p[BPF_REG_FP], 1,
-				STACK_FRAME_MIN_SIZE + ctx->stack_size);
+		EMIT(PPC_RAW_ADDI(b2p[BPF_REG_FP], 1,
+				STACK_FRAME_MIN_SIZE + ctx->stack_size));
 }
 
 static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx)
@@ -144,10 +144,10 @@
 
 	/* Tear down our stack frame */
 	if (bpf_has_stack_frame(ctx)) {
-		PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size);
+		EMIT(PPC_RAW_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size));
 		if (ctx->seen & SEEN_FUNC) {
 			PPC_BPF_LL(0, 1, PPC_LR_STKOFF);
-			PPC_MTLR(0);
+			EMIT(PPC_RAW_MTLR(0));
 		}
 	}
 }
@@ -157,9 +157,9 @@
 	bpf_jit_emit_common_epilogue(image, ctx);
 
 	/* Move result to r3 */
-	PPC_MR(3, b2p[BPF_REG_0]);
+	EMIT(PPC_RAW_MR(3, b2p[BPF_REG_0]));
 
-	PPC_BLR();
+	EMIT(PPC_RAW_BLR());
 }
 
 static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx,
@@ -171,7 +171,7 @@
 	/* Load actual entry point from function descriptor */
 	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0);
 	/* ... and move it to LR */
-	PPC_MTLR(b2p[TMP_REG_1]);
+	EMIT(PPC_RAW_MTLR(b2p[TMP_REG_1]));
 	/*
 	 * Load TOC from function descriptor at offset 8.
 	 * We can clobber r2 since we get called through a
@@ -182,9 +182,9 @@
 #else
 	/* We can clobber r12 */
 	PPC_FUNC_ADDR(12, func);
-	PPC_MTLR(12);
+	EMIT(PPC_RAW_MTLR(12));
 #endif
-	PPC_BLRL();
+	EMIT(PPC_RAW_BLRL());
 }
 
 static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx,
@@ -206,7 +206,7 @@
 	 * that PPC_LI64() can emit.
 	 */
 	for (i = ctx->idx - ctx_idx; i < 5; i++)
-		PPC_NOP();
+		EMIT(PPC_RAW_NOP());
 
 #ifdef PPC64_ELF_ABI_v1
 	/*
@@ -220,8 +220,8 @@
 	PPC_BPF_LL(12, 12, 0);
 #endif
 
-	PPC_MTLR(12);
-	PPC_BLRL();
+	EMIT(PPC_RAW_MTLR(12));
+	EMIT(PPC_RAW_BLRL());
 }
 
 static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out)
@@ -239,9 +239,9 @@
 	 * if (index >= array->map.max_entries)
 	 *   goto out;
 	 */
-	PPC_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries));
-	PPC_RLWINM(b2p_index, b2p_index, 0, 0, 31);
-	PPC_CMPLW(b2p_index, b2p[TMP_REG_1]);
+	EMIT(PPC_RAW_LWZ(b2p[TMP_REG_1], b2p_bpf_array, offsetof(struct bpf_array, map.max_entries)));
+	EMIT(PPC_RAW_RLWINM(b2p_index, b2p_index, 0, 0, 31));
+	EMIT(PPC_RAW_CMPLW(b2p_index, b2p[TMP_REG_1]));
 	PPC_BCC(COND_GE, out);
 
 	/*
@@ -249,42 +249,42 @@
 	 *   goto out;
 	 */
 	PPC_BPF_LL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
-	PPC_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT);
+	EMIT(PPC_RAW_CMPLWI(b2p[TMP_REG_1], MAX_TAIL_CALL_CNT));
 	PPC_BCC(COND_GT, out);
 
 	/*
 	 * tail_call_cnt++;
 	 */
-	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1);
+	EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], 1));
 	PPC_BPF_STL(b2p[TMP_REG_1], 1, bpf_jit_stack_tailcallcnt(ctx));
 
 	/* prog = array->ptrs[index]; */
-	PPC_MULI(b2p[TMP_REG_1], b2p_index, 8);
-	PPC_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array);
+	EMIT(PPC_RAW_MULI(b2p[TMP_REG_1], b2p_index, 8));
+	EMIT(PPC_RAW_ADD(b2p[TMP_REG_1], b2p[TMP_REG_1], b2p_bpf_array));
 	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_array, ptrs));
 
 	/*
 	 * if (prog == NULL)
 	 *   goto out;
 	 */
-	PPC_CMPLDI(b2p[TMP_REG_1], 0);
+	EMIT(PPC_RAW_CMPLDI(b2p[TMP_REG_1], 0));
 	PPC_BCC(COND_EQ, out);
 
 	/* goto *(prog->bpf_func + prologue_size); */
 	PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_1], offsetof(struct bpf_prog, bpf_func));
 #ifdef PPC64_ELF_ABI_v1
 	/* skip past the function descriptor */
-	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
-			FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE);
+	EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1],
+			FUNCTION_DESCR_SIZE + BPF_TAILCALL_PROLOGUE_SIZE));
 #else
-	PPC_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE);
+	EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], b2p[TMP_REG_1], BPF_TAILCALL_PROLOGUE_SIZE));
 #endif
-	PPC_MTCTR(b2p[TMP_REG_1]);
+	EMIT(PPC_RAW_MTCTR(b2p[TMP_REG_1]));
 
 	/* tear down stack, restore NVRs, ... */
 	bpf_jit_emit_common_epilogue(image, ctx);
 
-	PPC_BCTR();
+	EMIT(PPC_RAW_BCTR());
 	/* out: */
 }
 
@@ -340,11 +340,11 @@
 		 */
 		case BPF_ALU | BPF_ADD | BPF_X: /* (u32) dst += (u32) src */
 		case BPF_ALU64 | BPF_ADD | BPF_X: /* dst += src */
-			PPC_ADD(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_ADD(dst_reg, dst_reg, src_reg));
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_SUB | BPF_X: /* (u32) dst -= (u32) src */
 		case BPF_ALU64 | BPF_SUB | BPF_X: /* dst -= src */
-			PPC_SUB(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_SUB(dst_reg, dst_reg, src_reg));
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_ADD | BPF_K: /* (u32) dst += (u32) imm */
 		case BPF_ALU | BPF_SUB | BPF_K: /* (u32) dst -= (u32) imm */
@@ -354,53 +354,53 @@
 				imm = -imm;
 			if (imm) {
 				if (imm >= -32768 && imm < 32768)
-					PPC_ADDI(dst_reg, dst_reg, IMM_L(imm));
+					EMIT(PPC_RAW_ADDI(dst_reg, dst_reg, IMM_L(imm)));
 				else {
 					PPC_LI32(b2p[TMP_REG_1], imm);
-					PPC_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]);
+					EMIT(PPC_RAW_ADD(dst_reg, dst_reg, b2p[TMP_REG_1]));
 				}
 			}
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_MUL | BPF_X: /* (u32) dst *= (u32) src */
 		case BPF_ALU64 | BPF_MUL | BPF_X: /* dst *= src */
 			if (BPF_CLASS(code) == BPF_ALU)
-				PPC_MULW(dst_reg, dst_reg, src_reg);
+				EMIT(PPC_RAW_MULW(dst_reg, dst_reg, src_reg));
 			else
-				PPC_MULD(dst_reg, dst_reg, src_reg);
+				EMIT(PPC_RAW_MULD(dst_reg, dst_reg, src_reg));
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_MUL | BPF_K: /* (u32) dst *= (u32) imm */
 		case BPF_ALU64 | BPF_MUL | BPF_K: /* dst *= imm */
 			if (imm >= -32768 && imm < 32768)
-				PPC_MULI(dst_reg, dst_reg, IMM_L(imm));
+				EMIT(PPC_RAW_MULI(dst_reg, dst_reg, IMM_L(imm)));
 			else {
 				PPC_LI32(b2p[TMP_REG_1], imm);
 				if (BPF_CLASS(code) == BPF_ALU)
-					PPC_MULW(dst_reg, dst_reg,
-							b2p[TMP_REG_1]);
+					EMIT(PPC_RAW_MULW(dst_reg, dst_reg,
+							b2p[TMP_REG_1]));
 				else
-					PPC_MULD(dst_reg, dst_reg,
-							b2p[TMP_REG_1]);
+					EMIT(PPC_RAW_MULD(dst_reg, dst_reg,
+							b2p[TMP_REG_1]));
 			}
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_DIV | BPF_X: /* (u32) dst /= (u32) src */
 		case BPF_ALU | BPF_MOD | BPF_X: /* (u32) dst %= (u32) src */
 			if (BPF_OP(code) == BPF_MOD) {
-				PPC_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg);
-				PPC_MULW(b2p[TMP_REG_1], src_reg,
-						b2p[TMP_REG_1]);
-				PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
+				EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_1], dst_reg, src_reg));
+				EMIT(PPC_RAW_MULW(b2p[TMP_REG_1], src_reg,
+						b2p[TMP_REG_1]));
+				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]));
 			} else
-				PPC_DIVWU(dst_reg, dst_reg, src_reg);
+				EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg, src_reg));
 			goto bpf_alu32_trunc;
 		case BPF_ALU64 | BPF_DIV | BPF_X: /* dst /= src */
 		case BPF_ALU64 | BPF_MOD | BPF_X: /* dst %= src */
 			if (BPF_OP(code) == BPF_MOD) {
-				PPC_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg);
-				PPC_MULD(b2p[TMP_REG_1], src_reg,
-						b2p[TMP_REG_1]);
-				PPC_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]);
+				EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_1], dst_reg, src_reg));
+				EMIT(PPC_RAW_MULD(b2p[TMP_REG_1], src_reg,
+						b2p[TMP_REG_1]));
+				EMIT(PPC_RAW_SUB(dst_reg, dst_reg, b2p[TMP_REG_1]));
 			} else
-				PPC_DIVDU(dst_reg, dst_reg, src_reg);
+				EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg, src_reg));
 			break;
 		case BPF_ALU | BPF_MOD | BPF_K: /* (u32) dst %= (u32) imm */
 		case BPF_ALU | BPF_DIV | BPF_K: /* (u32) dst /= (u32) imm */
@@ -415,35 +415,37 @@
 			switch (BPF_CLASS(code)) {
 			case BPF_ALU:
 				if (BPF_OP(code) == BPF_MOD) {
-					PPC_DIVWU(b2p[TMP_REG_2], dst_reg,
-							b2p[TMP_REG_1]);
-					PPC_MULW(b2p[TMP_REG_1],
+					EMIT(PPC_RAW_DIVWU(b2p[TMP_REG_2],
+							dst_reg,
+							b2p[TMP_REG_1]));
+					EMIT(PPC_RAW_MULW(b2p[TMP_REG_1],
 							b2p[TMP_REG_1],
-							b2p[TMP_REG_2]);
-					PPC_SUB(dst_reg, dst_reg,
-							b2p[TMP_REG_1]);
+							b2p[TMP_REG_2]));
+					EMIT(PPC_RAW_SUB(dst_reg, dst_reg,
+							b2p[TMP_REG_1]));
 				} else
-					PPC_DIVWU(dst_reg, dst_reg,
-							b2p[TMP_REG_1]);
+					EMIT(PPC_RAW_DIVWU(dst_reg, dst_reg,
+							b2p[TMP_REG_1]));
 				break;
 			case BPF_ALU64:
 				if (BPF_OP(code) == BPF_MOD) {
-					PPC_DIVDU(b2p[TMP_REG_2], dst_reg,
-							b2p[TMP_REG_1]);
-					PPC_MULD(b2p[TMP_REG_1],
+					EMIT(PPC_RAW_DIVDU(b2p[TMP_REG_2],
+							dst_reg,
+							b2p[TMP_REG_1]));
+					EMIT(PPC_RAW_MULD(b2p[TMP_REG_1],
 							b2p[TMP_REG_1],
-							b2p[TMP_REG_2]);
-					PPC_SUB(dst_reg, dst_reg,
-							b2p[TMP_REG_1]);
+							b2p[TMP_REG_2]));
+					EMIT(PPC_RAW_SUB(dst_reg, dst_reg,
+							b2p[TMP_REG_1]));
 				} else
-					PPC_DIVDU(dst_reg, dst_reg,
-							b2p[TMP_REG_1]);
+					EMIT(PPC_RAW_DIVDU(dst_reg, dst_reg,
+							b2p[TMP_REG_1]));
 				break;
 			}
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_NEG: /* (u32) dst = -dst */
 		case BPF_ALU64 | BPF_NEG: /* dst = -dst */
-			PPC_NEG(dst_reg, dst_reg);
+			EMIT(PPC_RAW_NEG(dst_reg, dst_reg));
 			goto bpf_alu32_trunc;
 
 		/*
@@ -451,101 +453,101 @@
 		 */
 		case BPF_ALU | BPF_AND | BPF_X: /* (u32) dst = dst & src */
 		case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
-			PPC_AND(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_AND(dst_reg, dst_reg, src_reg));
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_AND | BPF_K: /* (u32) dst = dst & imm */
 		case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
 			if (!IMM_H(imm))
-				PPC_ANDI(dst_reg, dst_reg, IMM_L(imm));
+				EMIT(PPC_RAW_ANDI(dst_reg, dst_reg, IMM_L(imm)));
 			else {
 				/* Sign-extended */
 				PPC_LI32(b2p[TMP_REG_1], imm);
-				PPC_AND(dst_reg, dst_reg, b2p[TMP_REG_1]);
+				EMIT(PPC_RAW_AND(dst_reg, dst_reg, b2p[TMP_REG_1]));
 			}
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
 		case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
-			PPC_OR(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_OR(dst_reg, dst_reg, src_reg));
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_OR | BPF_K:/* dst = (u32) dst | (u32) imm */
 		case BPF_ALU64 | BPF_OR | BPF_K:/* dst = dst | imm */
 			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
 				/* Sign-extended */
 				PPC_LI32(b2p[TMP_REG_1], imm);
-				PPC_OR(dst_reg, dst_reg, b2p[TMP_REG_1]);
+				EMIT(PPC_RAW_OR(dst_reg, dst_reg, b2p[TMP_REG_1]));
 			} else {
 				if (IMM_L(imm))
-					PPC_ORI(dst_reg, dst_reg, IMM_L(imm));
+					EMIT(PPC_RAW_ORI(dst_reg, dst_reg, IMM_L(imm)));
 				if (IMM_H(imm))
-					PPC_ORIS(dst_reg, dst_reg, IMM_H(imm));
+					EMIT(PPC_RAW_ORIS(dst_reg, dst_reg, IMM_H(imm)));
 			}
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_XOR | BPF_X: /* (u32) dst ^= src */
 		case BPF_ALU64 | BPF_XOR | BPF_X: /* dst ^= src */
-			PPC_XOR(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_XOR(dst_reg, dst_reg, src_reg));
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_XOR | BPF_K: /* (u32) dst ^= (u32) imm */
 		case BPF_ALU64 | BPF_XOR | BPF_K: /* dst ^= imm */
 			if (imm < 0 && BPF_CLASS(code) == BPF_ALU64) {
 				/* Sign-extended */
 				PPC_LI32(b2p[TMP_REG_1], imm);
-				PPC_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]);
+				EMIT(PPC_RAW_XOR(dst_reg, dst_reg, b2p[TMP_REG_1]));
 			} else {
 				if (IMM_L(imm))
-					PPC_XORI(dst_reg, dst_reg, IMM_L(imm));
+					EMIT(PPC_RAW_XORI(dst_reg, dst_reg, IMM_L(imm)));
 				if (IMM_H(imm))
-					PPC_XORIS(dst_reg, dst_reg, IMM_H(imm));
+					EMIT(PPC_RAW_XORIS(dst_reg, dst_reg, IMM_H(imm)));
 			}
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_LSH | BPF_X: /* (u32) dst <<= (u32) src */
 			/* slw clears top 32 bits */
-			PPC_SLW(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_SLW(dst_reg, dst_reg, src_reg));
 			/* skip zero extension move, but set address map. */
 			if (insn_is_zext(&insn[i + 1]))
 				addrs[++i] = ctx->idx * 4;
 			break;
 		case BPF_ALU64 | BPF_LSH | BPF_X: /* dst <<= src; */
-			PPC_SLD(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_SLD(dst_reg, dst_reg, src_reg));
 			break;
 		case BPF_ALU | BPF_LSH | BPF_K: /* (u32) dst <<== (u32) imm */
 			/* with imm 0, we still need to clear top 32 bits */
-			PPC_SLWI(dst_reg, dst_reg, imm);
+			EMIT(PPC_RAW_SLWI(dst_reg, dst_reg, imm));
 			if (insn_is_zext(&insn[i + 1]))
 				addrs[++i] = ctx->idx * 4;
 			break;
 		case BPF_ALU64 | BPF_LSH | BPF_K: /* dst <<== imm */
 			if (imm != 0)
-				PPC_SLDI(dst_reg, dst_reg, imm);
+				EMIT(PPC_RAW_SLDI(dst_reg, dst_reg, imm));
 			break;
 		case BPF_ALU | BPF_RSH | BPF_X: /* (u32) dst >>= (u32) src */
-			PPC_SRW(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_SRW(dst_reg, dst_reg, src_reg));
 			if (insn_is_zext(&insn[i + 1]))
 				addrs[++i] = ctx->idx * 4;
 			break;
 		case BPF_ALU64 | BPF_RSH | BPF_X: /* dst >>= src */
-			PPC_SRD(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_SRD(dst_reg, dst_reg, src_reg));
 			break;
 		case BPF_ALU | BPF_RSH | BPF_K: /* (u32) dst >>= (u32) imm */
-			PPC_SRWI(dst_reg, dst_reg, imm);
+			EMIT(PPC_RAW_SRWI(dst_reg, dst_reg, imm));
 			if (insn_is_zext(&insn[i + 1]))
 				addrs[++i] = ctx->idx * 4;
 			break;
 		case BPF_ALU64 | BPF_RSH | BPF_K: /* dst >>= imm */
 			if (imm != 0)
-				PPC_SRDI(dst_reg, dst_reg, imm);
+				EMIT(PPC_RAW_SRDI(dst_reg, dst_reg, imm));
 			break;
 		case BPF_ALU | BPF_ARSH | BPF_X: /* (s32) dst >>= src */
-			PPC_SRAW(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_SRAW(dst_reg, dst_reg, src_reg));
 			goto bpf_alu32_trunc;
 		case BPF_ALU64 | BPF_ARSH | BPF_X: /* (s64) dst >>= src */
-			PPC_SRAD(dst_reg, dst_reg, src_reg);
+			EMIT(PPC_RAW_SRAD(dst_reg, dst_reg, src_reg));
 			break;
 		case BPF_ALU | BPF_ARSH | BPF_K: /* (s32) dst >>= imm */
-			PPC_SRAWI(dst_reg, dst_reg, imm);
+			EMIT(PPC_RAW_SRAWI(dst_reg, dst_reg, imm));
 			goto bpf_alu32_trunc;
 		case BPF_ALU64 | BPF_ARSH | BPF_K: /* (s64) dst >>= imm */
 			if (imm != 0)
-				PPC_SRADI(dst_reg, dst_reg, imm);
+				EMIT(PPC_RAW_SRADI(dst_reg, dst_reg, imm));
 			break;
 
 		/*
@@ -555,10 +557,10 @@
 		case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
 			if (imm == 1) {
 				/* special mov32 for zext */
-				PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
+				EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31));
 				break;
 			}
-			PPC_MR(dst_reg, src_reg);
+			EMIT(PPC_RAW_MR(dst_reg, src_reg));
 			goto bpf_alu32_trunc;
 		case BPF_ALU | BPF_MOV | BPF_K: /* (u32) dst = imm */
 		case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = (s64) imm */
@@ -572,7 +574,7 @@
 bpf_alu32_trunc:
 		/* Truncate to 32-bits */
 		if (BPF_CLASS(code) == BPF_ALU && !fp->aux->verifier_zext)
-			PPC_RLWINM(dst_reg, dst_reg, 0, 0, 31);
+			EMIT(PPC_RAW_RLWINM(dst_reg, dst_reg, 0, 0, 31));
 		break;
 
 		/*
@@ -590,11 +592,11 @@
 			switch (imm) {
 			case 16:
 				/* Rotate 8 bits left & mask with 0x0000ff00 */
-				PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23);
+				EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 16, 23));
 				/* Rotate 8 bits right & insert LSB to reg */
-				PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31);
+				EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 24, 31));
 				/* Move result back to dst_reg */
-				PPC_MR(dst_reg, b2p[TMP_REG_1]);
+				EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
 				break;
 			case 32:
 				/*
@@ -602,12 +604,12 @@
 				 * 2 bytes are already in their final position
 				 * -- byte 2 and 4 (of bytes 1, 2, 3 and 4)
 				 */
-				PPC_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31);
+				EMIT(PPC_RAW_RLWINM(b2p[TMP_REG_1], dst_reg, 8, 0, 31));
 				/* Rotate 24 bits and insert byte 1 */
-				PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7);
+				EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 0, 7));
 				/* Rotate 24 bits and insert byte 3 */
-				PPC_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23);
-				PPC_MR(dst_reg, b2p[TMP_REG_1]);
+				EMIT(PPC_RAW_RLWIMI(b2p[TMP_REG_1], dst_reg, 24, 16, 23));
+				EMIT(PPC_RAW_MR(dst_reg, b2p[TMP_REG_1]));
 				break;
 			case 64:
 				/*
@@ -619,8 +621,8 @@
 				 * same across all passes
 				 */
 				PPC_BPF_STL(dst_reg, 1, bpf_jit_stack_local(ctx));
-				PPC_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx));
-				PPC_LDBRX(dst_reg, 0, b2p[TMP_REG_1]);
+				EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], 1, bpf_jit_stack_local(ctx)));
+				EMIT(PPC_RAW_LDBRX(dst_reg, 0, b2p[TMP_REG_1]));
 				break;
 			}
 			break;
@@ -629,14 +631,14 @@
 			switch (imm) {
 			case 16:
 				/* zero-extend 16 bits into 64 bits */
-				PPC_RLDICL(dst_reg, dst_reg, 0, 48);
+				EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 48));
 				if (insn_is_zext(&insn[i + 1]))
 					addrs[++i] = ctx->idx * 4;
 				break;
 			case 32:
 				if (!fp->aux->verifier_zext)
 					/* zero-extend 32 bits into 64 bits */
-					PPC_RLDICL(dst_reg, dst_reg, 0, 32);
+					EMIT(PPC_RAW_RLDICL(dst_reg, dst_reg, 0, 32));
 				break;
 			case 64:
 				/* nop */
@@ -650,18 +652,18 @@
 		case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
 			if (BPF_CLASS(code) == BPF_ST) {
-				PPC_LI(b2p[TMP_REG_1], imm);
+				EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
 				src_reg = b2p[TMP_REG_1];
 			}
-			PPC_STB(src_reg, dst_reg, off);
+			EMIT(PPC_RAW_STB(src_reg, dst_reg, off));
 			break;
 		case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
 			if (BPF_CLASS(code) == BPF_ST) {
-				PPC_LI(b2p[TMP_REG_1], imm);
+				EMIT(PPC_RAW_LI(b2p[TMP_REG_1], imm));
 				src_reg = b2p[TMP_REG_1];
 			}
-			PPC_STH(src_reg, dst_reg, off);
+			EMIT(PPC_RAW_STH(src_reg, dst_reg, off));
 			break;
 		case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
@@ -669,7 +671,7 @@
 				PPC_LI32(b2p[TMP_REG_1], imm);
 				src_reg = b2p[TMP_REG_1];
 			}
-			PPC_STW(src_reg, dst_reg, off);
+			EMIT(PPC_RAW_STW(src_reg, dst_reg, off));
 			break;
 		case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
 		case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
@@ -686,24 +688,24 @@
 		/* *(u32 *)(dst + off) += src */
 		case BPF_STX | BPF_XADD | BPF_W:
 			/* Get EA into TMP_REG_1 */
-			PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
+			EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off));
 			tmp_idx = ctx->idx * 4;
 			/* load value from memory into TMP_REG_2 */
-			PPC_BPF_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
+			EMIT(PPC_RAW_LWARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0));
 			/* add value from src_reg into this */
-			PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
+			EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg));
 			/* store result back */
-			PPC_BPF_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
+			EMIT(PPC_RAW_STWCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]));
 			/* we're done if this succeeded */
 			PPC_BCC_SHORT(COND_NE, tmp_idx);
 			break;
 		/* *(u64 *)(dst + off) += src */
 		case BPF_STX | BPF_XADD | BPF_DW:
-			PPC_ADDI(b2p[TMP_REG_1], dst_reg, off);
+			EMIT(PPC_RAW_ADDI(b2p[TMP_REG_1], dst_reg, off));
 			tmp_idx = ctx->idx * 4;
-			PPC_BPF_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0);
-			PPC_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg);
-			PPC_BPF_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]);
+			EMIT(PPC_RAW_LDARX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1], 0));
+			EMIT(PPC_RAW_ADD(b2p[TMP_REG_2], b2p[TMP_REG_2], src_reg));
+			EMIT(PPC_RAW_STDCX(b2p[TMP_REG_2], 0, b2p[TMP_REG_1]));
 			PPC_BCC_SHORT(COND_NE, tmp_idx);
 			break;
 
@@ -712,19 +714,19 @@
 		 */
 		/* dst = *(u8 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_B:
-			PPC_LBZ(dst_reg, src_reg, off);
+			EMIT(PPC_RAW_LBZ(dst_reg, src_reg, off));
 			if (insn_is_zext(&insn[i + 1]))
 				addrs[++i] = ctx->idx * 4;
 			break;
 		/* dst = *(u16 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_H:
-			PPC_LHZ(dst_reg, src_reg, off);
+			EMIT(PPC_RAW_LHZ(dst_reg, src_reg, off));
 			if (insn_is_zext(&insn[i + 1]))
 				addrs[++i] = ctx->idx * 4;
 			break;
 		/* dst = *(u32 *)(ul) (src + off) */
 		case BPF_LDX | BPF_MEM | BPF_W:
-			PPC_LWZ(dst_reg, src_reg, off);
+			EMIT(PPC_RAW_LWZ(dst_reg, src_reg, off));
 			if (insn_is_zext(&insn[i + 1]))
 				addrs[++i] = ctx->idx * 4;
 			break;
@@ -775,7 +777,7 @@
 			else
 				bpf_jit_emit_func_call_rel(image, ctx, func_addr);
 			/* move return value from r3 to BPF_REG_0 */
-			PPC_MR(b2p[BPF_REG_0], 3);
+			EMIT(PPC_RAW_MR(b2p[BPF_REG_0], 3));
 			break;
 
 		/*
@@ -860,9 +862,9 @@
 			case BPF_JMP32 | BPF_JNE | BPF_X:
 				/* unsigned comparison */
 				if (BPF_CLASS(code) == BPF_JMP32)
-					PPC_CMPLW(dst_reg, src_reg);
+					EMIT(PPC_RAW_CMPLW(dst_reg, src_reg));
 				else
-					PPC_CMPLD(dst_reg, src_reg);
+					EMIT(PPC_RAW_CMPLD(dst_reg, src_reg));
 				break;
 			case BPF_JMP | BPF_JSGT | BPF_X:
 			case BPF_JMP | BPF_JSLT | BPF_X:
@@ -874,21 +876,21 @@
 			case BPF_JMP32 | BPF_JSLE | BPF_X:
 				/* signed comparison */
 				if (BPF_CLASS(code) == BPF_JMP32)
-					PPC_CMPW(dst_reg, src_reg);
+					EMIT(PPC_RAW_CMPW(dst_reg, src_reg));
 				else
-					PPC_CMPD(dst_reg, src_reg);
+					EMIT(PPC_RAW_CMPD(dst_reg, src_reg));
 				break;
 			case BPF_JMP | BPF_JSET | BPF_X:
 			case BPF_JMP32 | BPF_JSET | BPF_X:
 				if (BPF_CLASS(code) == BPF_JMP) {
-					PPC_AND_DOT(b2p[TMP_REG_1], dst_reg,
-						    src_reg);
+					EMIT(PPC_RAW_AND_DOT(b2p[TMP_REG_1], dst_reg,
+						    src_reg));
 				} else {
 					int tmp_reg = b2p[TMP_REG_1];
 
-					PPC_AND(tmp_reg, dst_reg, src_reg);
-					PPC_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
-						       31);
+					EMIT(PPC_RAW_AND(tmp_reg, dst_reg, src_reg));
+					EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg, 0, 0,
+						       31));
 				}
 				break;
 			case BPF_JMP | BPF_JNE | BPF_K:
@@ -912,19 +914,19 @@
 				 */
 				if (imm >= 0 && imm < 32768) {
 					if (is_jmp32)
-						PPC_CMPLWI(dst_reg, imm);
+						EMIT(PPC_RAW_CMPLWI(dst_reg, imm));
 					else
-						PPC_CMPLDI(dst_reg, imm);
+						EMIT(PPC_RAW_CMPLDI(dst_reg, imm));
 				} else {
 					/* sign-extending load */
 					PPC_LI32(b2p[TMP_REG_1], imm);
 					/* ... but unsigned comparison */
 					if (is_jmp32)
-						PPC_CMPLW(dst_reg,
-							  b2p[TMP_REG_1]);
+						EMIT(PPC_RAW_CMPLW(dst_reg,
+							  b2p[TMP_REG_1]));
 					else
-						PPC_CMPLD(dst_reg,
-							  b2p[TMP_REG_1]);
+						EMIT(PPC_RAW_CMPLD(dst_reg,
+							  b2p[TMP_REG_1]));
 				}
 				break;
 			}
@@ -945,17 +947,17 @@
 				 */
 				if (imm >= -32768 && imm < 32768) {
 					if (is_jmp32)
-						PPC_CMPWI(dst_reg, imm);
+						EMIT(PPC_RAW_CMPWI(dst_reg, imm));
 					else
-						PPC_CMPDI(dst_reg, imm);
+						EMIT(PPC_RAW_CMPDI(dst_reg, imm));
 				} else {
 					PPC_LI32(b2p[TMP_REG_1], imm);
 					if (is_jmp32)
-						PPC_CMPW(dst_reg,
-							 b2p[TMP_REG_1]);
+						EMIT(PPC_RAW_CMPW(dst_reg,
+							 b2p[TMP_REG_1]));
 					else
-						PPC_CMPD(dst_reg,
-							 b2p[TMP_REG_1]);
+						EMIT(PPC_RAW_CMPD(dst_reg,
+							 b2p[TMP_REG_1]));
 				}
 				break;
 			}
@@ -964,19 +966,19 @@
 				/* andi does not sign-extend the immediate */
 				if (imm >= 0 && imm < 32768)
 					/* PPC_ANDI is _only/always_ dot-form */
-					PPC_ANDI(b2p[TMP_REG_1], dst_reg, imm);
+					EMIT(PPC_RAW_ANDI(b2p[TMP_REG_1], dst_reg, imm));
 				else {
 					int tmp_reg = b2p[TMP_REG_1];
 
 					PPC_LI32(tmp_reg, imm);
 					if (BPF_CLASS(code) == BPF_JMP) {
-						PPC_AND_DOT(tmp_reg, dst_reg,
-							    tmp_reg);
+						EMIT(PPC_RAW_AND_DOT(tmp_reg, dst_reg,
+							    tmp_reg));
 					} else {
-						PPC_AND(tmp_reg, dst_reg,
-							tmp_reg);
-						PPC_RLWINM_DOT(tmp_reg, tmp_reg,
-							       0, 0, 31);
+						EMIT(PPC_RAW_AND(tmp_reg, dst_reg,
+							tmp_reg));
+						EMIT(PPC_RAW_RLWINM_DOT(tmp_reg, tmp_reg,
+							       0, 0, 31));
 					}
 				}
 				break;
diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile
index 53d614e..c02854d 100644
--- a/arch/powerpc/perf/Makefile
+++ b/arch/powerpc/perf/Makefile
@@ -9,7 +9,7 @@
 obj64-$(CONFIG_PPC_PERF_CTRS)	+= ppc970-pmu.o power5-pmu.o \
 				   power5+-pmu.o power6-pmu.o power7-pmu.o \
 				   isa207-common.o power8-pmu.o power9-pmu.o \
-				   generic-compat-pmu.o
+				   generic-compat-pmu.o power10-pmu.o
 obj32-$(CONFIG_PPC_PERF_CTRS)	+= mpc7450-pmu.o
 
 obj-$(CONFIG_PPC_POWERNV)	+= imc-pmu.o
diff --git a/arch/powerpc/perf/callchain.h b/arch/powerpc/perf/callchain.h
index 7a2cb9e..ae24d4a 100644
--- a/arch/powerpc/perf/callchain.h
+++ b/arch/powerpc/perf/callchain.h
@@ -2,7 +2,7 @@
 #ifndef _POWERPC_PERF_CALLCHAIN_H
 #define _POWERPC_PERF_CALLCHAIN_H
 
-int read_user_stack_slow(void __user *ptr, void *buf, int nb);
+int read_user_stack_slow(const void __user *ptr, void *buf, int nb);
 void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
 			    struct pt_regs *regs);
 void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
@@ -16,4 +16,27 @@
 	return (!sp || (sp & mask) || (sp > top));
 }
 
+/*
+ * On 32-bit we just access the address and let hash_page create a
+ * HPTE if necessary, so there is no need to fall back to reading
+ * the page tables.  Since this is called at interrupt level,
+ * do_page_fault() won't treat a DSI as a page fault.
+ */
+static inline int __read_user_stack(const void __user *ptr, void *ret,
+				    size_t size)
+{
+	unsigned long addr = (unsigned long)ptr;
+	int rc;
+
+	if (addr > TASK_SIZE - size || (addr & (size - 1)))
+		return -EFAULT;
+
+	rc = copy_from_user_nofault(ret, ptr, size);
+
+	if (IS_ENABLED(CONFIG_PPC64) && rc)
+		return read_user_stack_slow(ptr, ret, size);
+
+	return rc;
+}
+
 #endif /* _POWERPC_PERF_CALLCHAIN_H */
diff --git a/arch/powerpc/perf/callchain_32.c b/arch/powerpc/perf/callchain_32.c
index 542e68b..64e4013 100644
--- a/arch/powerpc/perf/callchain_32.c
+++ b/arch/powerpc/perf/callchain_32.c
@@ -30,26 +30,9 @@
 
 #endif /* CONFIG_PPC64 */
 
-/*
- * On 32-bit we just access the address and let hash_page create a
- * HPTE if necessary, so there is no need to fall back to reading
- * the page tables.  Since this is called at interrupt level,
- * do_page_fault() won't treat a DSI as a page fault.
- */
-static int read_user_stack_32(unsigned int __user *ptr, unsigned int *ret)
+static int read_user_stack_32(const unsigned int __user *ptr, unsigned int *ret)
 {
-	int rc;
-
-	if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned int) ||
-	    ((unsigned long)ptr & 3))
-		return -EFAULT;
-
-	rc = copy_from_user_nofault(ret, ptr, sizeof(*ret));
-
-	if (IS_ENABLED(CONFIG_PPC64) && rc)
-		return read_user_stack_slow(ptr, ret, 4);
-
-	return rc;
+	return __read_user_stack(ptr, ret, sizeof(*ret));
 }
 
 /*
diff --git a/arch/powerpc/perf/callchain_64.c b/arch/powerpc/perf/callchain_64.c
index fa2a1b8..fed90e8 100644
--- a/arch/powerpc/perf/callchain_64.c
+++ b/arch/powerpc/perf/callchain_64.c
@@ -23,7 +23,7 @@
  * interrupt context, so if the access faults, we read the page tables
  * to find which page (if any) is mapped and access it directly.
  */
-int read_user_stack_slow(void __user *ptr, void *buf, int nb)
+int read_user_stack_slow(const void __user *ptr, void *buf, int nb)
 {
 
 	unsigned long addr = (unsigned long) ptr;
@@ -44,16 +44,9 @@
 	return -EFAULT;
 }
 
-static int read_user_stack_64(unsigned long __user *ptr, unsigned long *ret)
+static int read_user_stack_64(const unsigned long __user *ptr, unsigned long *ret)
 {
-	if ((unsigned long)ptr > TASK_SIZE - sizeof(unsigned long) ||
-	    ((unsigned long)ptr & 7))
-		return -EFAULT;
-
-	if (!copy_from_user_nofault(ret, ptr, sizeof(*ret)))
-		return 0;
-
-	return read_user_stack_slow(ptr, ret, 8);
+	return __read_user_stack(ptr, ret, sizeof(*ret));
 }
 
 /*
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index 01d7028..78fe349 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -37,12 +37,7 @@
 	struct perf_event *event[MAX_HWEVENTS];
 	u64 events[MAX_HWEVENTS];
 	unsigned int flags[MAX_HWEVENTS];
-	/*
-	 * The order of the MMCR array is:
-	 *  - 64-bit, MMCR0, MMCR1, MMCRA, MMCR2
-	 *  - 32-bit, MMCR0, MMCR1, MMCR2
-	 */
-	unsigned long mmcr[4];
+	struct mmcr_regs mmcr;
 	struct perf_event *limited_counter[MAX_LIMITED_HWCOUNTERS];
 	u8  limited_hwidx[MAX_LIMITED_HWCOUNTERS];
 	u64 alternatives[MAX_HWEVENTS][MAX_EVENT_ALTERNATIVES];
@@ -77,6 +72,11 @@
 /*
  * 32-bit doesn't have MMCRA but does have an MMCR2,
  * and a few other names are different.
+ * Also 32-bit doesn't have MMCR3, SIER2 and SIER3.
+ * Define them as zero knowing that any code path accessing
+ * these registers (via mtspr/mfspr) are done under ppmu flag
+ * check for PPMU_ARCH_31 and we will not enter that code path
+ * for 32-bit.
  */
 #ifdef CONFIG_PPC32
 
@@ -90,7 +90,11 @@
 #define MMCR0_PMCC_U6		0
 
 #define SPRN_MMCRA		SPRN_MMCR2
+#define SPRN_MMCR3		0
+#define SPRN_SIER2		0
+#define SPRN_SIER3		0
 #define MMCRA_SAMPLE_ENABLE	0
+#define MMCRA_BHRB_DISABLE     0
 
 static inline unsigned long perf_ip_adjust(struct pt_regs *regs)
 {
@@ -121,7 +125,7 @@
 static void ebb_switch_out(unsigned long mmcr0) { }
 static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
 {
-	return cpuhw->mmcr[0];
+	return cpuhw->mmcr.mmcr0;
 }
 
 static inline void power_pmu_bhrb_enable(struct perf_event *event) {}
@@ -466,8 +470,11 @@
 			 * addresses at this point. Check the privileges before
 			 * exporting it to userspace (avoid exposure of regions
 			 * where we could have speculative execution)
+			 * Incase of ISA v3.1, BHRB will capture only user-space
+			 * addresses, hence include a check before filtering code
 			 */
-			if (is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
+			if (!(ppmu->flags & PPMU_ARCH_31) &&
+				is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
 				continue;
 
 			/* Branches are read most recent first (ie. mfbhrb 0 is
@@ -586,11 +593,16 @@
 	current->thread.sdar  = mfspr(SPRN_SDAR);
 	current->thread.mmcr0 = mmcr0 & MMCR0_USER_MASK;
 	current->thread.mmcr2 = mfspr(SPRN_MMCR2) & MMCR2_USER_MASK;
+	if (ppmu->flags & PPMU_ARCH_31) {
+		current->thread.mmcr3 = mfspr(SPRN_MMCR3);
+		current->thread.sier2 = mfspr(SPRN_SIER2);
+		current->thread.sier3 = mfspr(SPRN_SIER3);
+	}
 }
 
 static unsigned long ebb_switch_in(bool ebb, struct cpu_hw_events *cpuhw)
 {
-	unsigned long mmcr0 = cpuhw->mmcr[0];
+	unsigned long mmcr0 = cpuhw->mmcr.mmcr0;
 
 	if (!ebb)
 		goto out;
@@ -624,7 +636,13 @@
 	 * unfreeze counters, it should not set exclude_xxx in its events and
 	 * instead manage the MMCR2 entirely by itself.
 	 */
-	mtspr(SPRN_MMCR2, cpuhw->mmcr[3] | current->thread.mmcr2);
+	mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2 | current->thread.mmcr2);
+
+	if (ppmu->flags & PPMU_ARCH_31) {
+		mtspr(SPRN_MMCR3, current->thread.mmcr3);
+		mtspr(SPRN_SIER2, current->thread.sier2);
+		mtspr(SPRN_SIER3, current->thread.sier3);
+	}
 out:
 	return mmcr0;
 }
@@ -845,6 +863,11 @@
 		pr_info("EBBRR: %016lx BESCR: %016lx\n",
 			mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR));
 	}
+
+	if (ppmu->flags & PPMU_ARCH_31) {
+		pr_info("MMCR3: %016lx SIER2: %016lx SIER3: %016lx\n",
+			mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3));
+	}
 #endif
 	pr_info("SIAR:  %016lx SDAR:  %016lx SIER:  %016lx\n",
 		mfspr(SPRN_SIAR), sdar, sier);
@@ -1196,7 +1219,7 @@
 static void power_pmu_disable(struct pmu *pmu)
 {
 	struct cpu_hw_events *cpuhw;
-	unsigned long flags, mmcr0, val;
+	unsigned long flags, mmcr0, val, mmcra;
 
 	if (!ppmu)
 		return;
@@ -1229,12 +1252,24 @@
 		mb();
 		isync();
 
+		val = mmcra = cpuhw->mmcr.mmcra;
+
 		/*
 		 * Disable instruction sampling if it was enabled
 		 */
-		if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
-			mtspr(SPRN_MMCRA,
-			      cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
+		if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE)
+			val &= ~MMCRA_SAMPLE_ENABLE;
+
+		/* Disable BHRB via mmcra (BHRBRD) for p10 */
+		if (ppmu->flags & PPMU_ARCH_31)
+			val |= MMCRA_BHRB_DISABLE;
+
+		/*
+		 * Write SPRN_MMCRA if mmcra has either disabled
+		 * instruction sampling or BHRB.
+		 */
+		if (val != mmcra) {
+			mtspr(SPRN_MMCRA, mmcra);
 			mb();
 			isync();
 		}
@@ -1308,18 +1343,20 @@
 	 * (possibly updated for removal of events).
 	 */
 	if (!cpuhw->n_added) {
-		mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-		mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
+		mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+		mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+		if (ppmu->flags & PPMU_ARCH_31)
+			mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
 		goto out_enable;
 	}
 
 	/*
 	 * Clear all MMCR settings and recompute them for the new set of events.
 	 */
-	memset(cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
+	memset(&cpuhw->mmcr, 0, sizeof(cpuhw->mmcr));
 
 	if (ppmu->compute_mmcr(cpuhw->events, cpuhw->n_events, hwc_index,
-			       cpuhw->mmcr, cpuhw->event)) {
+			       &cpuhw->mmcr, cpuhw->event)) {
 		/* shouldn't ever get here */
 		printk(KERN_ERR "oops compute_mmcr failed\n");
 		goto out;
@@ -1333,11 +1370,11 @@
 		 */
 		event = cpuhw->event[0];
 		if (event->attr.exclude_user)
-			cpuhw->mmcr[0] |= MMCR0_FCP;
+			cpuhw->mmcr.mmcr0 |= MMCR0_FCP;
 		if (event->attr.exclude_kernel)
-			cpuhw->mmcr[0] |= freeze_events_kernel;
+			cpuhw->mmcr.mmcr0 |= freeze_events_kernel;
 		if (event->attr.exclude_hv)
-			cpuhw->mmcr[0] |= MMCR0_FCHV;
+			cpuhw->mmcr.mmcr0 |= MMCR0_FCHV;
 	}
 
 	/*
@@ -1346,12 +1383,15 @@
 	 * Then unfreeze the events.
 	 */
 	ppc_set_pmu_inuse(1);
-	mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
-	mtspr(SPRN_MMCR1, cpuhw->mmcr[1]);
-	mtspr(SPRN_MMCR0, (cpuhw->mmcr[0] & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
+	mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra & ~MMCRA_SAMPLE_ENABLE);
+	mtspr(SPRN_MMCR1, cpuhw->mmcr.mmcr1);
+	mtspr(SPRN_MMCR0, (cpuhw->mmcr.mmcr0 & ~(MMCR0_PMC1CE | MMCR0_PMCjCE))
 				| MMCR0_FC);
 	if (ppmu->flags & PPMU_ARCH_207S)
-		mtspr(SPRN_MMCR2, cpuhw->mmcr[3]);
+		mtspr(SPRN_MMCR2, cpuhw->mmcr.mmcr2);
+
+	if (ppmu->flags & PPMU_ARCH_31)
+		mtspr(SPRN_MMCR3, cpuhw->mmcr.mmcr3);
 
 	/*
 	 * Read off any pre-existing events that need to move
@@ -1402,7 +1442,7 @@
 		perf_event_update_userpage(event);
 	}
 	cpuhw->n_limited = n_lim;
-	cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE;
+	cpuhw->mmcr.mmcr0 |= MMCR0_PMXE | MMCR0_FCECE;
 
  out_enable:
 	pmao_restore_workaround(ebb);
@@ -1418,9 +1458,9 @@
 	/*
 	 * Enable instruction sampling if necessary
 	 */
-	if (cpuhw->mmcr[2] & MMCRA_SAMPLE_ENABLE) {
+	if (cpuhw->mmcr.mmcra & MMCRA_SAMPLE_ENABLE) {
 		mb();
-		mtspr(SPRN_MMCRA, cpuhw->mmcr[2]);
+		mtspr(SPRN_MMCRA, cpuhw->mmcr.mmcra);
 	}
 
  out:
@@ -1550,7 +1590,7 @@
 				cpuhw->flags[i-1] = cpuhw->flags[i];
 			}
 			--cpuhw->n_events;
-			ppmu->disable_pmc(event->hw.idx - 1, cpuhw->mmcr);
+			ppmu->disable_pmc(event->hw.idx - 1, &cpuhw->mmcr);
 			if (event->hw.idx) {
 				write_pmc(event->hw.idx, 0);
 				event->hw.idx = 0;
@@ -1571,7 +1611,7 @@
 	}
 	if (cpuhw->n_events == 0) {
 		/* disable exceptions if no events are running */
-		cpuhw->mmcr[0] &= ~(MMCR0_PMXE | MMCR0_FCECE);
+		cpuhw->mmcr.mmcr0 &= ~(MMCR0_PMXE | MMCR0_FCECE);
 	}
 
 	if (has_branch_stack(event))
@@ -1795,7 +1835,7 @@
 static int hw_perf_cache_event(u64 config, u64 *eventp)
 {
 	unsigned long type, op, result;
-	int ev;
+	u64 ev;
 
 	if (!ppmu->cache_events)
 		return -EINVAL;
@@ -2246,7 +2286,7 @@
 	 * XXX might want to use MSR.PM to keep the events frozen until
 	 * we get back out of this interrupt.
 	 */
-	write_mmcr0(cpuhw, cpuhw->mmcr[0]);
+	write_mmcr0(cpuhw, cpuhw->mmcr.mmcr0);
 
 	if (nmi)
 		nmi_exit();
@@ -2268,7 +2308,7 @@
 
 	if (ppmu) {
 		memset(cpuhw, 0, sizeof(*cpuhw));
-		cpuhw->mmcr[0] = MMCR0_FC;
+		cpuhw->mmcr.mmcr0 = MMCR0_FC;
 	}
 	return 0;
 }
@@ -2314,6 +2354,8 @@
 		return 0;
 	else if (!init_power9_pmu())
 		return 0;
+	else if (!init_power10_pmu())
+		return 0;
 	else if (!init_ppc970_pmu())
 		return 0;
 	else
diff --git a/arch/powerpc/perf/generic-compat-pmu.c b/arch/powerpc/perf/generic-compat-pmu.c
index 5e5a54d..eb8a6aaf 100644
--- a/arch/powerpc/perf/generic-compat-pmu.c
+++ b/arch/powerpc/perf/generic-compat-pmu.c
@@ -101,7 +101,7 @@
  * 0 means not supported, -1 means nonsensical, other values
  * are event codes.
  */
-static int generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 generic_compat_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[ C(L1D) ] = {
 		[ C(OP_READ) ] = {
 			[ C(RESULT_ACCESS) ] = 0,
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index db213eb..cdb7bfb 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -31,6 +31,8 @@
 /* Whether we have to aggregate result data for some domains. */
 static bool aggregate_result_elements;
 
+static cpumask_t hv_24x7_cpumask;
+
 static bool domain_is_valid(unsigned domain)
 {
 	switch (domain) {
@@ -446,6 +448,12 @@
 	return sprintf(buf, "%s\n", (char *)d->var);
 }
 
+static ssize_t cpumask_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	return cpumap_print_to_pagebuf(true, buf, &hv_24x7_cpumask);
+}
+
 static ssize_t sockets_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
@@ -1113,6 +1121,7 @@
 static DEVICE_ATTR_RO(sockets);
 static DEVICE_ATTR_RO(chipspersocket);
 static DEVICE_ATTR_RO(coresperchip);
+static DEVICE_ATTR_RO(cpumask);
 
 static struct bin_attribute *if_bin_attrs[] = {
 	&bin_attr_catalog,
@@ -1126,6 +1135,7 @@
 	&dev_attr_sockets.attr,
 	&dev_attr_chipspersocket.attr,
 	&dev_attr_coresperchip.attr,
+	&dev_attr_cpumask.attr,
 	NULL,
 };
 
@@ -1641,6 +1651,45 @@
 	.capabilities = PERF_PMU_CAP_NO_EXCLUDE,
 };
 
+static int ppc_hv_24x7_cpu_online(unsigned int cpu)
+{
+	if (cpumask_empty(&hv_24x7_cpumask))
+		cpumask_set_cpu(cpu, &hv_24x7_cpumask);
+
+	return 0;
+}
+
+static int ppc_hv_24x7_cpu_offline(unsigned int cpu)
+{
+	int target;
+
+	/* Check if exiting cpu is used for collecting 24x7 events */
+	if (!cpumask_test_and_clear_cpu(cpu, &hv_24x7_cpumask))
+		return 0;
+
+	/* Find a new cpu to collect 24x7 events */
+	target = cpumask_last(cpu_active_mask);
+
+	if (target < 0 || target >= nr_cpu_ids) {
+		pr_err("hv_24x7: CPU hotplug init failed\n");
+		return -1;
+	}
+
+	/* Migrate 24x7 events to the new target */
+	cpumask_set_cpu(target, &hv_24x7_cpumask);
+	perf_pmu_migrate_context(&h_24x7_pmu, cpu, target);
+
+	return 0;
+}
+
+static int hv_24x7_cpu_hotplug_init(void)
+{
+	return cpuhp_setup_state(CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
+			  "perf/powerpc/hv_24x7:online",
+			  ppc_hv_24x7_cpu_online,
+			  ppc_hv_24x7_cpu_offline);
+}
+
 static int hv_24x7_init(void)
 {
 	int r;
@@ -1685,6 +1734,11 @@
 	if (r)
 		return r;
 
+	/* init cpuhotplug */
+	r = hv_24x7_cpu_hotplug_init();
+	if (r)
+		return r;
+
 	r = perf_pmu_register(&h_24x7_pmu, h_24x7_pmu.name, -1);
 	if (r)
 		return r;
diff --git a/arch/powerpc/perf/imc-pmu.c b/arch/powerpc/perf/imc-pmu.c
index 0edcfd0..a45d694 100644
--- a/arch/powerpc/perf/imc-pmu.c
+++ b/arch/powerpc/perf/imc-pmu.c
@@ -1288,11 +1288,30 @@
 	header->size = sizeof(*header) + event->header_size;
 	header->misc = 0;
 
-	if (is_kernel_addr(data->ip))
-		header->misc |= PERF_RECORD_MISC_KERNEL;
-	else
-		header->misc |= PERF_RECORD_MISC_USER;
-
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		switch (IMC_TRACE_RECORD_VAL_HVPR(mem->val)) {
+		case 0:/* when MSR HV and PR not set in the trace-record */
+			header->misc |= PERF_RECORD_MISC_GUEST_KERNEL;
+			break;
+		case 1: /* MSR HV is 0 and PR is 1 */
+			header->misc |= PERF_RECORD_MISC_GUEST_USER;
+			break;
+		case 2: /* MSR HV is 1 and PR is 0 */
+			header->misc |= PERF_RECORD_MISC_HYPERVISOR;
+			break;
+		case 3: /* MSR HV is 1 and PR is 1 */
+			header->misc |= PERF_RECORD_MISC_USER;
+			break;
+		default:
+			pr_info("IMC: Unable to set the flag based on MSR bits\n");
+			break;
+		}
+	} else {
+		if (is_kernel_addr(data->ip))
+			header->misc |= PERF_RECORD_MISC_KERNEL;
+		else
+			header->misc |= PERF_RECORD_MISC_USER;
+	}
 	perf_event_header__init_id(header, data, event);
 
 	return 0;
diff --git a/arch/powerpc/perf/internal.h b/arch/powerpc/perf/internal.h
index f755c64..80bbf72 100644
--- a/arch/powerpc/perf/internal.h
+++ b/arch/powerpc/perf/internal.h
@@ -9,4 +9,5 @@
 extern int init_power7_pmu(void);
 extern int init_power8_pmu(void);
 extern int init_power9_pmu(void);
+extern int init_power10_pmu(void);
 extern int init_generic_compat_pmu(void);
diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c
index 4c86da5..964437a 100644
--- a/arch/powerpc/perf/isa207-common.c
+++ b/arch/powerpc/perf/isa207-common.c
@@ -55,7 +55,9 @@
 {
 	u64 valid_mask = EVENT_VALID_MASK;
 
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		valid_mask = p10_EVENT_VALID_MASK;
+	else if (cpu_has_feature(CPU_FTR_ARCH_300))
 		valid_mask = p9_EVENT_VALID_MASK;
 
 	return !(event & ~valid_mask);
@@ -69,6 +71,14 @@
 	return false;
 }
 
+static unsigned long sdar_mod_val(u64 event)
+{
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		return p10_SDAR_MODE(event);
+
+	return p9_SDAR_MODE(event);
+}
+
 static void mmcra_sdar_mode(u64 event, unsigned long *mmcra)
 {
 	/*
@@ -79,7 +89,7 @@
 	 * MMCRA[SDAR_MODE] will be programmed as "0b01" for continous sampling
 	 * mode and will be un-changed when setting MMCRA[63] (Marked events).
 	 *
-	 * Incase of Power9:
+	 * Incase of Power9/power10:
 	 * Marked event: MMCRA[SDAR_MODE] will be set to 0b00 ('No Updates'),
 	 *               or if group already have any marked events.
 	 * For rest
@@ -90,8 +100,8 @@
 	if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 		if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE))
 			*mmcra &= MMCRA_SDAR_MODE_NO_UPDATES;
-		else if (p9_SDAR_MODE(event))
-			*mmcra |=  p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT;
+		else if (sdar_mod_val(event))
+			*mmcra |= sdar_mod_val(event) << MMCRA_SDAR_MODE_SHIFT;
 		else
 			*mmcra |= MMCRA_SDAR_MODE_DCACHE;
 	} else
@@ -134,7 +144,11 @@
 	/*
 	 * Check the mantissa upper two bits are not zero, unless the
 	 * exponent is also zero. See the THRESH_CMP_MANTISSA doc.
+	 * Power10: thresh_cmp is replaced by l2_l3 event select.
 	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		return false;
+
 	cmp = (event >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
 	exp = cmp >> 7;
 
@@ -251,7 +265,12 @@
 
 	pmc   = (event >> EVENT_PMC_SHIFT)        & EVENT_PMC_MASK;
 	unit  = (event >> EVENT_UNIT_SHIFT)       & EVENT_UNIT_MASK;
-	cache = (event >> EVENT_CACHE_SEL_SHIFT)  & EVENT_CACHE_SEL_MASK;
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		cache = (event >> EVENT_CACHE_SEL_SHIFT) &
+			p10_EVENT_CACHE_SEL_MASK;
+	else
+		cache = (event >> EVENT_CACHE_SEL_SHIFT) &
+			EVENT_CACHE_SEL_MASK;
 	ebb   = (event >> EVENT_EBB_SHIFT)        & EVENT_EBB_MASK;
 
 	if (pmc) {
@@ -283,7 +302,10 @@
 	}
 
 	if (unit >= 6 && unit <= 9) {
-		if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+		if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
+			mask |= CNST_L2L3_GROUP_MASK;
+			value |= CNST_L2L3_GROUP_VAL(event >> p10_L2L3_EVENT_SHIFT);
+		} else if (cpu_has_feature(CPU_FTR_ARCH_300)) {
 			mask  |= CNST_CACHE_GROUP_MASK;
 			value |= CNST_CACHE_GROUP_VAL(event & 0xff);
 
@@ -363,10 +385,11 @@
 }
 
 int isa207_compute_mmcr(u64 event[], int n_ev,
-			       unsigned int hwc[], unsigned long mmcr[],
+			       unsigned int hwc[], struct mmcr_regs *mmcr,
 			       struct perf_event *pevents[])
 {
 	unsigned long mmcra, mmcr1, mmcr2, unit, combine, psel, cache, val;
+	unsigned long mmcr3;
 	unsigned int pmc, pmc_inuse;
 	int i;
 
@@ -379,7 +402,14 @@
 			pmc_inuse |= 1 << pmc;
 	}
 
-	mmcra = mmcr1 = mmcr2 = 0;
+	mmcra = mmcr1 = mmcr2 = mmcr3 = 0;
+
+	/*
+	 * Disable bhrb unless explicitly requested
+	 * by setting MMCRA (BHRBRD) bit.
+	 */
+	if (cpu_has_feature(CPU_FTR_ARCH_31))
+		mmcra |= MMCRA_BHRB_DISABLE;
 
 	/* Second pass: assign PMCs, set all MMCR1 fields */
 	for (i = 0; i < n_ev; ++i) {
@@ -438,8 +468,17 @@
 			mmcra |= val << MMCRA_THR_CTL_SHIFT;
 			val = (event[i] >> EVENT_THR_SEL_SHIFT) & EVENT_THR_SEL_MASK;
 			mmcra |= val << MMCRA_THR_SEL_SHIFT;
-			val = (event[i] >> EVENT_THR_CMP_SHIFT) & EVENT_THR_CMP_MASK;
-			mmcra |= thresh_cmp_val(val);
+			if (!cpu_has_feature(CPU_FTR_ARCH_31)) {
+				val = (event[i] >> EVENT_THR_CMP_SHIFT) &
+					EVENT_THR_CMP_MASK;
+				mmcra |= thresh_cmp_val(val);
+			}
+		}
+
+		if (cpu_has_feature(CPU_FTR_ARCH_31) && (unit == 6)) {
+			val = (event[i] >> p10_L2L3_EVENT_SHIFT) &
+				p10_EVENT_L2L3_SEL_MASK;
+			mmcr2 |= val << p10_L2L3_SEL_SHIFT;
 		}
 
 		if (event[i] & EVENT_WANTS_BHRB) {
@@ -447,6 +486,11 @@
 			mmcra |= val << MMCRA_IFM_SHIFT;
 		}
 
+		/* set MMCRA (BHRBRD) to 0 if there is user request for BHRB */
+		if (cpu_has_feature(CPU_FTR_ARCH_31) &&
+				(has_branch_stack(pevents[i]) || (event[i] & EVENT_WANTS_BHRB)))
+			mmcra &= ~MMCRA_BHRB_DISABLE;
+
 		if (pevents[i]->attr.exclude_user)
 			mmcr2 |= MMCR2_FCP(pmc);
 
@@ -460,34 +504,43 @@
 				mmcr2 |= MMCR2_FCS(pmc);
 		}
 
+		if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+			if (pmc <= 4) {
+				val = (event[i] >> p10_EVENT_MMCR3_SHIFT) &
+					p10_EVENT_MMCR3_MASK;
+				mmcr3 |= val << MMCR3_SHIFT(pmc);
+			}
+		}
+
 		hwc[i] = pmc - 1;
 	}
 
 	/* Return MMCRx values */
-	mmcr[0] = 0;
+	mmcr->mmcr0 = 0;
 
 	/* pmc_inuse is 1-based */
 	if (pmc_inuse & 2)
-		mmcr[0] = MMCR0_PMC1CE;
+		mmcr->mmcr0 = MMCR0_PMC1CE;
 
 	if (pmc_inuse & 0x7c)
-		mmcr[0] |= MMCR0_PMCjCE;
+		mmcr->mmcr0 |= MMCR0_PMCjCE;
 
 	/* If we're not using PMC 5 or 6, freeze them */
 	if (!(pmc_inuse & 0x60))
-		mmcr[0] |= MMCR0_FC56;
+		mmcr->mmcr0 |= MMCR0_FC56;
 
-	mmcr[1] = mmcr1;
-	mmcr[2] = mmcra;
-	mmcr[3] = mmcr2;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
+	mmcr->mmcr2 = mmcr2;
+	mmcr->mmcr3 = mmcr3;
 
 	return 0;
 }
 
-void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
 {
 	if (pmc <= 3)
-		mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
+		mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SHIFT(pmc + 1));
 }
 
 static int find_alternative(u64 event, const unsigned int ev_alt[][MAX_ALT], int size)
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 63fd4f3..044de65 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -87,6 +87,31 @@
 	 EVENT_LINUX_MASK					|	\
 	 EVENT_PSEL_MASK))
 
+/* Contants to support power10 raw encoding format */
+#define p10_SDAR_MODE_SHIFT		22
+#define p10_SDAR_MODE_MASK		0x3ull
+#define p10_SDAR_MODE(v)		(((v) >> p10_SDAR_MODE_SHIFT) & \
+					p10_SDAR_MODE_MASK)
+#define p10_EVENT_L2L3_SEL_MASK		0x1f
+#define p10_L2L3_SEL_SHIFT		3
+#define p10_L2L3_EVENT_SHIFT		40
+#define p10_EVENT_THRESH_MASK		0xffffull
+#define p10_EVENT_CACHE_SEL_MASK	0x3ull
+#define p10_EVENT_MMCR3_MASK		0x7fffull
+#define p10_EVENT_MMCR3_SHIFT		45
+
+#define p10_EVENT_VALID_MASK		\
+	((p10_SDAR_MODE_MASK   << p10_SDAR_MODE_SHIFT		|	\
+	(p10_EVENT_THRESH_MASK  << EVENT_THRESH_SHIFT)		|	\
+	(EVENT_SAMPLE_MASK     << EVENT_SAMPLE_SHIFT)		|	\
+	(p10_EVENT_CACHE_SEL_MASK  << EVENT_CACHE_SEL_SHIFT)	|	\
+	(EVENT_PMC_MASK        << EVENT_PMC_SHIFT)		|	\
+	(EVENT_UNIT_MASK       << EVENT_UNIT_SHIFT)		|	\
+	(p9_EVENT_COMBINE_MASK << p9_EVENT_COMBINE_SHIFT)	|	\
+	(p10_EVENT_MMCR3_MASK  << p10_EVENT_MMCR3_SHIFT)	|	\
+	(EVENT_MARKED_MASK     << EVENT_MARKED_SHIFT)		|	\
+	 EVENT_LINUX_MASK					|	\
+	EVENT_PSEL_MASK))
 /*
  * Layout of constraint bits:
  *
@@ -135,6 +160,9 @@
 #define CNST_CACHE_PMC4_VAL	(1ull << 54)
 #define CNST_CACHE_PMC4_MASK	CNST_CACHE_PMC4_VAL
 
+#define CNST_L2L3_GROUP_VAL(v)	(((v) & 0x1full) << 55)
+#define CNST_L2L3_GROUP_MASK	CNST_L2L3_GROUP_VAL(0x1f)
+
 /*
  * For NC we are counting up to 4 events. This requires three bits, and we need
  * the fifth event to overflow and set the 4th bit. To achieve that we bias the
@@ -191,7 +219,7 @@
 #define MMCRA_THR_CTR_EXP(v)		(((v) >> MMCRA_THR_CTR_EXP_SHIFT) &\
 						MMCRA_THR_CTR_EXP_MASK)
 
-/* MMCR1 Threshold Compare bit constant for power9 */
+/* MMCRA Threshold Compare bit constant for power9 */
 #define p9_MMCRA_THR_CMP_SHIFT	45
 
 /* Bits in MMCR2 for PowerISA v2.07 */
@@ -202,6 +230,9 @@
 #define MAX_ALT				2
 #define MAX_PMU_COUNTERS		6
 
+/* Bits in MMCR3 for PowerISA v3.10 */
+#define MMCR3_SHIFT(pmc)		(49 - (15 * ((pmc) - 1)))
+
 #define ISA207_SIER_TYPE_SHIFT		15
 #define ISA207_SIER_TYPE_MASK		(0x7ull << ISA207_SIER_TYPE_SHIFT)
 
@@ -217,9 +248,9 @@
 
 int isa207_get_constraint(u64 event, unsigned long *maskp, unsigned long *valp);
 int isa207_compute_mmcr(u64 event[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[],
+				unsigned int hwc[], struct mmcr_regs *mmcr,
 				struct perf_event *pevents[]);
-void isa207_disable_pmc(unsigned int pmc, unsigned long mmcr[]);
+void isa207_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr);
 int isa207_get_alternatives(u64 event, u64 alt[], int size, unsigned int flags,
 					const unsigned int ev_alt[][MAX_ALT]);
 void isa207_get_mem_data_src(union perf_mem_data_src *dsrc, u32 flags,
diff --git a/arch/powerpc/perf/mpc7450-pmu.c b/arch/powerpc/perf/mpc7450-pmu.c
index 4d5ef92..1919e9d 100644
--- a/arch/powerpc/perf/mpc7450-pmu.c
+++ b/arch/powerpc/perf/mpc7450-pmu.c
@@ -257,7 +257,7 @@
  * Compute MMCR0/1/2 values for a set of events.
  */
 static int mpc7450_compute_mmcr(u64 event[], int n_ev, unsigned int hwc[],
-				unsigned long mmcr[],
+				struct mmcr_regs *mmcr,
 				struct perf_event *pevents[])
 {
 	u8 event_index[N_CLASSES][N_COUNTER];
@@ -321,9 +321,16 @@
 		mmcr0 |= MMCR0_PMCnCE;
 
 	/* Return MMCRx values */
-	mmcr[0] = mmcr0;
-	mmcr[1] = mmcr1;
-	mmcr[2] = mmcr2;
+	mmcr->mmcr0 = mmcr0;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcr2 = mmcr2;
+	/*
+	 * 32-bit doesn't have an MMCRA and uses SPRN_MMCR2 to define
+	 * SPRN_MMCRA. So assign mmcra of cpu_hw_events with `mmcr2`
+	 * value to ensure that any write to this SPRN_MMCRA will
+	 * use mmcr2 value.
+	 */
+	mmcr->mmcra = mmcr2;
 	return 0;
 }
 
@@ -331,12 +338,12 @@
  * Disable counting by a PMC.
  * Note that the pmc argument is 0-based here, not 1-based.
  */
-static void mpc7450_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void mpc7450_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
 {
 	if (pmc <= 1)
-		mmcr[0] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
+		mmcr->mmcr0 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
 	else
-		mmcr[1] &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
+		mmcr->mmcr1 &= ~(pmcsel_mask[pmc] << pmcsel_shift[pmc]);
 }
 
 static int mpc7450_generic_events[] = {
@@ -354,7 +361,7 @@
  * 0 means not supported, -1 means nonsensical, other values
  * are event codes.
  */
-static int mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 mpc7450_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0,		0x225	},
 		[C(OP_WRITE)] = {	0,		0x227	},
diff --git a/arch/powerpc/perf/power10-events-list.h b/arch/powerpc/perf/power10-events-list.h
new file mode 100644
index 0000000..60c1b81
--- /dev/null
+++ b/arch/powerpc/perf/power10-events-list.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Performance counter support for POWER10 processors.
+ *
+ * Copyright 2020 Madhavan Srinivasan, IBM Corporation.
+ * Copyright 2020 Athira Rajeev, IBM Corporation.
+ */
+
+/*
+ * Power10 event codes.
+ */
+EVENT(PM_RUN_CYC,				0x600f4);
+EVENT(PM_DISP_STALL_CYC,			0x100f8);
+EVENT(PM_EXEC_STALL,				0x30008);
+EVENT(PM_RUN_INST_CMPL,				0x500fa);
+EVENT(PM_BR_CMPL,                               0x4d05e);
+EVENT(PM_BR_MPRED_CMPL,                         0x400f6);
+
+/* All L1 D cache load references counted at finish, gated by reject */
+EVENT(PM_LD_REF_L1,				0x100fc);
+/* Load Missed L1 */
+EVENT(PM_LD_MISS_L1,				0x3e054);
+/* Store Missed L1 */
+EVENT(PM_ST_MISS_L1,				0x300f0);
+/* L1 cache data prefetches */
+EVENT(PM_LD_PREFETCH_CACHE_LINE_MISS,		0x1002c);
+/* Demand iCache Miss */
+EVENT(PM_L1_ICACHE_MISS,			0x200fc);
+/* Instruction fetches from L1 */
+EVENT(PM_INST_FROM_L1,				0x04080);
+/* Instruction Demand sectors wriittent into IL1 */
+EVENT(PM_INST_FROM_L1MISS,			0x03f00000001c040);
+/* Instruction prefetch written into IL1 */
+EVENT(PM_IC_PREF_REQ,				0x040a0);
+/* The data cache was reloaded from local core's L3 due to a demand load */
+EVENT(PM_DATA_FROM_L3,				0x01340000001c040);
+/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */
+EVENT(PM_DATA_FROM_L3MISS,			0x300fe);
+/* Data PTEG reload */
+EVENT(PM_DTLB_MISS,				0x300fc);
+/* ITLB Reloaded */
+EVENT(PM_ITLB_MISS,				0x400fc);
+
+EVENT(PM_RUN_CYC_ALT,				0x0001e);
+EVENT(PM_RUN_INST_CMPL_ALT,			0x00002);
+
+/*
+ * Memory Access Events
+ *
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * To enable capturing of memory profiling, these MMCRA bits
+ * needs to be programmed and corresponding raw event format
+ * encoding.
+ *
+ * MMCRA bits encoding needed are
+ *     SM (Sampling Mode)
+ *     EM (Eligibility for Random Sampling)
+ *     TECE (Threshold Event Counter Event)
+ *     TS (Threshold Start Event)
+ *     TE (Threshold End Event)
+ *
+ * Corresponding Raw Encoding bits:
+ *     sample [EM,SM]
+ *     thresh_sel (TECE)
+ *     thresh start (TS)
+ *     thresh end (TE)
+ */
+
+EVENT(MEM_LOADS,				0x34340401e0);
+EVENT(MEM_STORES,				0x343c0401e0);
diff --git a/arch/powerpc/perf/power10-pmu.c b/arch/powerpc/perf/power10-pmu.c
new file mode 100644
index 0000000..f7cff7f
--- /dev/null
+++ b/arch/powerpc/perf/power10-pmu.c
@@ -0,0 +1,419 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Performance counter support for POWER10 processors.
+ *
+ * Copyright 2020 Madhavan Srinivasan, IBM Corporation.
+ * Copyright 2020 Athira Rajeev, IBM Corporation.
+ */
+
+#define pr_fmt(fmt)	"power10-pmu: " fmt
+
+#include "isa207-common.h"
+#include "internal.h"
+
+/*
+ * Raw event encoding for Power10:
+ *
+ *        60        56        52        48        44        40        36        32
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   | | [ ]   [ src_match ] [  src_mask ]   | [ ] [ l2l3_sel ]  [  thresh_ctl   ]
+ *   | |  |                                  |  |                         |
+ *   | |  *- IFM (Linux)                     |  |        thresh start/stop -*
+ *   | *- BHRB (Linux)                       |  src_sel
+ *   *- EBB (Linux)                          *invert_bit
+ *
+ *        28        24        20        16        12         8         4         0
+ * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - |
+ *   [   ] [  sample ]   [ ] [ ]   [ pmc ]   [unit ]   [ ]   m   [    pmcxsel    ]
+ *     |        |        |    |                        |     |
+ *     |        |        |    |                        |     *- mark
+ *     |        |        |    *- L1/L2/L3 cache_sel    |
+ *     |        |        sdar_mode                     |
+ *     |        *- sampling mode for marked events     *- combine
+ *     |
+ *     *- thresh_sel
+ *
+ * Below uses IBM bit numbering.
+ *
+ * MMCR1[x:y] = unit    (PMCxUNIT)
+ * MMCR1[24]   = pmc1combine[0]
+ * MMCR1[25]   = pmc1combine[1]
+ * MMCR1[26]   = pmc2combine[0]
+ * MMCR1[27]   = pmc2combine[1]
+ * MMCR1[28]   = pmc3combine[0]
+ * MMCR1[29]   = pmc3combine[1]
+ * MMCR1[30]   = pmc4combine[0]
+ * MMCR1[31]   = pmc4combine[1]
+ *
+ * if pmc == 3 and unit == 0 and pmcxsel[0:6] == 0b0101011
+ *	MMCR1[20:27] = thresh_ctl
+ * else if pmc == 4 and unit == 0xf and pmcxsel[0:6] == 0b0101001
+ *	MMCR1[20:27] = thresh_ctl
+ * else
+ *	MMCRA[48:55] = thresh_ctl   (THRESH START/END)
+ *
+ * if thresh_sel:
+ *	MMCRA[45:47] = thresh_sel
+ *
+ * if l2l3_sel:
+ * MMCR2[56:60] = l2l3_sel[0:4]
+ *
+ * MMCR1[16] = cache_sel[0]
+ * MMCR1[17] = cache_sel[1]
+ *
+ * if mark:
+ *	MMCRA[63]    = 1		(SAMPLE_ENABLE)
+ *	MMCRA[57:59] = sample[0:2]	(RAND_SAMP_ELIG)
+ *	MMCRA[61:62] = sample[3:4]	(RAND_SAMP_MODE)
+ *
+ * if EBB and BHRB:
+ *	MMCRA[32:33] = IFM
+ *
+ * MMCRA[SDAR_MODE]  = sdar_mode[0:1]
+ */
+
+/*
+ * Some power10 event codes.
+ */
+#define EVENT(_name, _code)     enum{_name = _code}
+
+#include "power10-events-list.h"
+
+#undef EVENT
+
+/* MMCRA IFM bits - POWER10 */
+#define POWER10_MMCRA_IFM1		0x0000000040000000UL
+#define POWER10_MMCRA_IFM2		0x0000000080000000UL
+#define POWER10_MMCRA_IFM3		0x00000000C0000000UL
+#define POWER10_MMCRA_BHRB_MASK		0x00000000C0000000UL
+
+/* Table of alternatives, sorted by column 0 */
+static const unsigned int power10_event_alternatives[][MAX_ALT] = {
+	{ PM_RUN_CYC_ALT,		PM_RUN_CYC },
+	{ PM_RUN_INST_CMPL_ALT,		PM_RUN_INST_CMPL },
+};
+
+static int power10_get_alternatives(u64 event, unsigned int flags, u64 alt[])
+{
+	int num_alt = 0;
+
+	num_alt = isa207_get_alternatives(event, alt,
+					  ARRAY_SIZE(power10_event_alternatives), flags,
+					  power10_event_alternatives);
+
+	return num_alt;
+}
+
+GENERIC_EVENT_ATTR(cpu-cycles,			PM_RUN_CYC);
+GENERIC_EVENT_ATTR(instructions,		PM_RUN_INST_CMPL);
+GENERIC_EVENT_ATTR(branch-instructions,		PM_BR_CMPL);
+GENERIC_EVENT_ATTR(branch-misses,		PM_BR_MPRED_CMPL);
+GENERIC_EVENT_ATTR(cache-references,		PM_LD_REF_L1);
+GENERIC_EVENT_ATTR(cache-misses,		PM_LD_MISS_L1);
+GENERIC_EVENT_ATTR(mem-loads,			MEM_LOADS);
+GENERIC_EVENT_ATTR(mem-stores,			MEM_STORES);
+
+CACHE_EVENT_ATTR(L1-dcache-load-misses,		PM_LD_MISS_L1);
+CACHE_EVENT_ATTR(L1-dcache-loads,		PM_LD_REF_L1);
+CACHE_EVENT_ATTR(L1-dcache-prefetches,		PM_LD_PREFETCH_CACHE_LINE_MISS);
+CACHE_EVENT_ATTR(L1-dcache-store-misses,	PM_ST_MISS_L1);
+CACHE_EVENT_ATTR(L1-icache-load-misses,		PM_L1_ICACHE_MISS);
+CACHE_EVENT_ATTR(L1-icache-loads,		PM_INST_FROM_L1);
+CACHE_EVENT_ATTR(L1-icache-prefetches,		PM_IC_PREF_REQ);
+CACHE_EVENT_ATTR(LLC-load-misses,		PM_DATA_FROM_L3MISS);
+CACHE_EVENT_ATTR(LLC-loads,			PM_DATA_FROM_L3);
+CACHE_EVENT_ATTR(branch-load-misses,		PM_BR_MPRED_CMPL);
+CACHE_EVENT_ATTR(branch-loads,			PM_BR_CMPL);
+CACHE_EVENT_ATTR(dTLB-load-misses,		PM_DTLB_MISS);
+CACHE_EVENT_ATTR(iTLB-load-misses,		PM_ITLB_MISS);
+
+static struct attribute *power10_events_attr[] = {
+	GENERIC_EVENT_PTR(PM_RUN_CYC),
+	GENERIC_EVENT_PTR(PM_RUN_INST_CMPL),
+	GENERIC_EVENT_PTR(PM_BR_CMPL),
+	GENERIC_EVENT_PTR(PM_BR_MPRED_CMPL),
+	GENERIC_EVENT_PTR(PM_LD_REF_L1),
+	GENERIC_EVENT_PTR(PM_LD_MISS_L1),
+	GENERIC_EVENT_PTR(MEM_LOADS),
+	GENERIC_EVENT_PTR(MEM_STORES),
+	CACHE_EVENT_PTR(PM_LD_MISS_L1),
+	CACHE_EVENT_PTR(PM_LD_REF_L1),
+	CACHE_EVENT_PTR(PM_LD_PREFETCH_CACHE_LINE_MISS),
+	CACHE_EVENT_PTR(PM_ST_MISS_L1),
+	CACHE_EVENT_PTR(PM_L1_ICACHE_MISS),
+	CACHE_EVENT_PTR(PM_INST_FROM_L1),
+	CACHE_EVENT_PTR(PM_IC_PREF_REQ),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3MISS),
+	CACHE_EVENT_PTR(PM_DATA_FROM_L3),
+	CACHE_EVENT_PTR(PM_BR_MPRED_CMPL),
+	CACHE_EVENT_PTR(PM_BR_CMPL),
+	CACHE_EVENT_PTR(PM_DTLB_MISS),
+	CACHE_EVENT_PTR(PM_ITLB_MISS),
+	NULL
+};
+
+static struct attribute_group power10_pmu_events_group = {
+	.name = "events",
+	.attrs = power10_events_attr,
+};
+
+PMU_FORMAT_ATTR(event,          "config:0-59");
+PMU_FORMAT_ATTR(pmcxsel,        "config:0-7");
+PMU_FORMAT_ATTR(mark,           "config:8");
+PMU_FORMAT_ATTR(combine,        "config:10-11");
+PMU_FORMAT_ATTR(unit,           "config:12-15");
+PMU_FORMAT_ATTR(pmc,            "config:16-19");
+PMU_FORMAT_ATTR(cache_sel,      "config:20-21");
+PMU_FORMAT_ATTR(sdar_mode,      "config:22-23");
+PMU_FORMAT_ATTR(sample_mode,    "config:24-28");
+PMU_FORMAT_ATTR(thresh_sel,     "config:29-31");
+PMU_FORMAT_ATTR(thresh_stop,    "config:32-35");
+PMU_FORMAT_ATTR(thresh_start,   "config:36-39");
+PMU_FORMAT_ATTR(l2l3_sel,       "config:40-44");
+PMU_FORMAT_ATTR(src_sel,        "config:45-46");
+PMU_FORMAT_ATTR(invert_bit,     "config:47");
+PMU_FORMAT_ATTR(src_mask,       "config:48-53");
+PMU_FORMAT_ATTR(src_match,      "config:54-59");
+
+static struct attribute *power10_pmu_format_attr[] = {
+	&format_attr_event.attr,
+	&format_attr_pmcxsel.attr,
+	&format_attr_mark.attr,
+	&format_attr_combine.attr,
+	&format_attr_unit.attr,
+	&format_attr_pmc.attr,
+	&format_attr_cache_sel.attr,
+	&format_attr_sdar_mode.attr,
+	&format_attr_sample_mode.attr,
+	&format_attr_thresh_sel.attr,
+	&format_attr_thresh_stop.attr,
+	&format_attr_thresh_start.attr,
+	&format_attr_l2l3_sel.attr,
+	&format_attr_src_sel.attr,
+	&format_attr_invert_bit.attr,
+	&format_attr_src_mask.attr,
+	&format_attr_src_match.attr,
+	NULL,
+};
+
+static struct attribute_group power10_pmu_format_group = {
+	.name = "format",
+	.attrs = power10_pmu_format_attr,
+};
+
+static const struct attribute_group *power10_pmu_attr_groups[] = {
+	&power10_pmu_format_group,
+	&power10_pmu_events_group,
+	NULL,
+};
+
+static int power10_generic_events[] = {
+	[PERF_COUNT_HW_CPU_CYCLES] =			PM_RUN_CYC,
+	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_RUN_INST_CMPL,
+	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BR_CMPL,
+	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL,
+	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1,
+	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,
+};
+
+static u64 power10_bhrb_filter_map(u64 branch_sample_type)
+{
+	u64 pmu_bhrb_filter = 0;
+
+	/* BHRB and regular PMU events share the same privilege state
+	 * filter configuration. BHRB is always recorded along with a
+	 * regular PMU event. As the privilege state filter is handled
+	 * in the basic PMC configuration of the accompanying regular
+	 * PMU event, we ignore any separate BHRB specific request.
+	 */
+
+	/* No branch filter requested */
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY)
+		return pmu_bhrb_filter;
+
+	/* Invalid branch filter options - HW does not support */
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_RETURN)
+		return -1;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_IND_CALL) {
+		pmu_bhrb_filter |= POWER10_MMCRA_IFM2;
+		return pmu_bhrb_filter;
+	}
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_COND) {
+		pmu_bhrb_filter |= POWER10_MMCRA_IFM3;
+		return pmu_bhrb_filter;
+	}
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_CALL)
+		return -1;
+
+	if (branch_sample_type & PERF_SAMPLE_BRANCH_ANY_CALL) {
+		pmu_bhrb_filter |= POWER10_MMCRA_IFM1;
+		return pmu_bhrb_filter;
+	}
+
+	/* Every thing else is unsupported */
+	return -1;
+}
+
+static void power10_config_bhrb(u64 pmu_bhrb_filter)
+{
+	pmu_bhrb_filter &= POWER10_MMCRA_BHRB_MASK;
+
+	/* Enable BHRB filter in PMU */
+	mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));
+}
+
+#define C(x)	PERF_COUNT_HW_CACHE_##x
+
+/*
+ * Table of generalized cache-related events.
+ * 0 means not supported, -1 means nonsensical, other values
+ * are event codes.
+ */
+static u64 power10_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+	[C(L1D)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_LD_REF_L1,
+			[C(RESULT_MISS)] = PM_LD_MISS_L1,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = 0,
+			[C(RESULT_MISS)] = PM_ST_MISS_L1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = PM_LD_PREFETCH_CACHE_LINE_MISS,
+			[C(RESULT_MISS)] = 0,
+		},
+	},
+	[C(L1I)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_INST_FROM_L1,
+			[C(RESULT_MISS)] = PM_L1_ICACHE_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = PM_INST_FROM_L1MISS,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = PM_IC_PREF_REQ,
+			[C(RESULT_MISS)] = 0,
+		},
+	},
+	[C(LL)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_DATA_FROM_L3,
+			[C(RESULT_MISS)] = PM_DATA_FROM_L3MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = 0,
+		},
+	},
+	 [C(DTLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0,
+			[C(RESULT_MISS)] = PM_DTLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+	[C(ITLB)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = 0,
+			[C(RESULT_MISS)] = PM_ITLB_MISS,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+	[C(BPU)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = PM_BR_CMPL,
+			[C(RESULT_MISS)] = PM_BR_MPRED_CMPL,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+	[C(NODE)] = {
+		[C(OP_READ)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_WRITE)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+		[C(OP_PREFETCH)] = {
+			[C(RESULT_ACCESS)] = -1,
+			[C(RESULT_MISS)] = -1,
+		},
+	},
+};
+
+#undef C
+
+static struct power_pmu power10_pmu = {
+	.name			= "POWER10",
+	.n_counter		= MAX_PMU_COUNTERS,
+	.add_fields		= ISA207_ADD_FIELDS,
+	.test_adder		= ISA207_TEST_ADDER,
+	.group_constraint_mask	= CNST_CACHE_PMC4_MASK,
+	.group_constraint_val	= CNST_CACHE_PMC4_VAL,
+	.compute_mmcr		= isa207_compute_mmcr,
+	.config_bhrb		= power10_config_bhrb,
+	.bhrb_filter_map	= power10_bhrb_filter_map,
+	.get_constraint		= isa207_get_constraint,
+	.get_alternatives	= power10_get_alternatives,
+	.get_mem_data_src	= isa207_get_mem_data_src,
+	.get_mem_weight		= isa207_get_mem_weight,
+	.disable_pmc		= isa207_disable_pmc,
+	.flags			= PPMU_HAS_SIER | PPMU_ARCH_207S |
+				  PPMU_ARCH_31,
+	.n_generic		= ARRAY_SIZE(power10_generic_events),
+	.generic_events		= power10_generic_events,
+	.cache_events		= &power10_cache_events,
+	.attr_groups		= power10_pmu_attr_groups,
+	.bhrb_nr		= 32,
+};
+
+int init_power10_pmu(void)
+{
+	int rc;
+
+	/* Comes from cpu_specs[] */
+	if (!cur_cpu_spec->oprofile_cpu_type ||
+	    strcmp(cur_cpu_spec->oprofile_cpu_type, "ppc64/power10"))
+		return -ENODEV;
+
+	rc = register_power_pmu(&power10_pmu);
+	if (rc)
+		return rc;
+
+	/* Tell userspace that EBB is supported */
+	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB;
+
+	return 0;
+}
diff --git a/arch/powerpc/perf/power5+-pmu.c b/arch/powerpc/perf/power5+-pmu.c
index f857454..a62b2cd 100644
--- a/arch/powerpc/perf/power5+-pmu.c
+++ b/arch/powerpc/perf/power5+-pmu.c
@@ -448,7 +448,8 @@
 }
 
 static int power5p_compute_mmcr(u64 event[], int n_ev,
-				unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+				unsigned int hwc[], struct mmcr_regs *mmcr,
+				struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = 0;
@@ -586,20 +587,20 @@
 	}
 
 	/* Return MMCRx values */
-	mmcr[0] = 0;
+	mmcr->mmcr0 = 0;
 	if (pmc_inuse & 1)
-		mmcr[0] = MMCR0_PMC1CE;
+		mmcr->mmcr0 = MMCR0_PMC1CE;
 	if (pmc_inuse & 0x3e)
-		mmcr[0] |= MMCR0_PMCjCE;
-	mmcr[1] = mmcr1;
-	mmcr[2] = mmcra;
+		mmcr->mmcr0 |= MMCR0_PMCjCE;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
 	return 0;
 }
 
-static void power5p_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void power5p_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
 {
 	if (pmc <= 3)
-		mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+		mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
 }
 
 static int power5p_generic_events[] = {
@@ -618,7 +619,7 @@
  * 0 means not supported, -1 means nonsensical, other values
  * are event codes.
  */
-static int power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power5p_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0x1c10a8,	0x3c1088	},
 		[C(OP_WRITE)] = {	0x2c10a8,	0xc10c3		},
diff --git a/arch/powerpc/perf/power5-pmu.c b/arch/powerpc/perf/power5-pmu.c
index da52eca..8732b58 100644
--- a/arch/powerpc/perf/power5-pmu.c
+++ b/arch/powerpc/perf/power5-pmu.c
@@ -379,7 +379,8 @@
 }
 
 static int power5_compute_mmcr(u64 event[], int n_ev,
-			       unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+			       unsigned int hwc[], struct mmcr_regs *mmcr,
+			       struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -528,20 +529,20 @@
 	}
 
 	/* Return MMCRx values */
-	mmcr[0] = 0;
+	mmcr->mmcr0 = 0;
 	if (pmc_inuse & 1)
-		mmcr[0] = MMCR0_PMC1CE;
+		mmcr->mmcr0 = MMCR0_PMC1CE;
 	if (pmc_inuse & 0x3e)
-		mmcr[0] |= MMCR0_PMCjCE;
-	mmcr[1] = mmcr1;
-	mmcr[2] = mmcra;
+		mmcr->mmcr0 |= MMCR0_PMCjCE;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
 	return 0;
 }
 
-static void power5_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void power5_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
 {
 	if (pmc <= 3)
-		mmcr[1] &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
+		mmcr->mmcr1 &= ~(0x7fUL << MMCR1_PMCSEL_SH(pmc));
 }
 
 static int power5_generic_events[] = {
@@ -560,7 +561,7 @@
  * 0 means not supported, -1 means nonsensical, other values
  * are event codes.
  */
-static int power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power5_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0x4c1090,	0x3c1088	},
 		[C(OP_WRITE)] = {	0x3c1090,	0xc10c3		},
diff --git a/arch/powerpc/perf/power6-pmu.c b/arch/powerpc/perf/power6-pmu.c
index 3929cac..0e318cf 100644
--- a/arch/powerpc/perf/power6-pmu.c
+++ b/arch/powerpc/perf/power6-pmu.c
@@ -171,7 +171,7 @@
  * Assign PMC numbers and compute MMCR1 value for a set of events
  */
 static int p6_compute_mmcr(u64 event[], int n_ev,
-			   unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+			   unsigned int hwc[], struct mmcr_regs *mmcr, struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -243,13 +243,13 @@
 		if (pmc < 4)
 			mmcr1 |= (unsigned long)psel << MMCR1_PMCSEL_SH(pmc);
 	}
-	mmcr[0] = 0;
+	mmcr->mmcr0 = 0;
 	if (pmc_inuse & 1)
-		mmcr[0] = MMCR0_PMC1CE;
+		mmcr->mmcr0 = MMCR0_PMC1CE;
 	if (pmc_inuse & 0xe)
-		mmcr[0] |= MMCR0_PMCjCE;
-	mmcr[1] = mmcr1;
-	mmcr[2] = mmcra;
+		mmcr->mmcr0 |= MMCR0_PMCjCE;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
 	return 0;
 }
 
@@ -457,11 +457,11 @@
 	return nalt;
 }
 
-static void p6_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void p6_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
 {
 	/* Set PMCxSEL to 0 to disable PMCx */
 	if (pmc <= 3)
-		mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+		mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
 }
 
 static int power6_generic_events[] = {
@@ -481,7 +481,7 @@
  * are event codes.
  * The "DTLB" and "ITLB" events relate to the DERAT and IERAT.
  */
-static int power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power6_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0x280030,	0x80080		},
 		[C(OP_WRITE)] = {	0x180032,	0x80088		},
diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c
index a137813..5e0bf09 100644
--- a/arch/powerpc/perf/power7-pmu.c
+++ b/arch/powerpc/perf/power7-pmu.c
@@ -242,7 +242,8 @@
 }
 
 static int power7_compute_mmcr(u64 event[], int n_ev,
-			       unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+			       unsigned int hwc[], struct mmcr_regs *mmcr,
+			       struct perf_event *pevents[])
 {
 	unsigned long mmcr1 = 0;
 	unsigned long mmcra = MMCRA_SDAR_DCACHE_MISS | MMCRA_SDAR_ERAT_MISS;
@@ -298,20 +299,20 @@
 	}
 
 	/* Return MMCRx values */
-	mmcr[0] = 0;
+	mmcr->mmcr0 = 0;
 	if (pmc_inuse & 1)
-		mmcr[0] = MMCR0_PMC1CE;
+		mmcr->mmcr0 = MMCR0_PMC1CE;
 	if (pmc_inuse & 0x3e)
-		mmcr[0] |= MMCR0_PMCjCE;
-	mmcr[1] = mmcr1;
-	mmcr[2] = mmcra;
+		mmcr->mmcr0 |= MMCR0_PMCjCE;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
 	return 0;
 }
 
-static void power7_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void power7_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
 {
 	if (pmc <= 3)
-		mmcr[1] &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
+		mmcr->mmcr1 &= ~(0xffUL << MMCR1_PMCSEL_SH(pmc));
 }
 
 static int power7_generic_events[] = {
@@ -332,7 +333,7 @@
  * 0 means not supported, -1 means nonsensical, other values
  * are event codes.
  */
-static int power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power7_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0xc880,		0x400f0	},
 		[C(OP_WRITE)] = {	0,		0x300f0	},
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c
index 3a5fcc2..5282e84 100644
--- a/arch/powerpc/perf/power8-pmu.c
+++ b/arch/powerpc/perf/power8-pmu.c
@@ -253,7 +253,7 @@
  * 0 means not supported, -1 means nonsensical, other values
  * are event codes.
  */
-static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[ C(L1D) ] = {
 		[ C(OP_READ) ] = {
 			[ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 08c3ef7..05dae38 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -310,7 +310,7 @@
  * 0 means not supported, -1 means nonsensical, other values
  * are event codes.
  */
-static int power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 power9_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[ C(L1D) ] = {
 		[ C(OP_READ) ] = {
 			[ C(RESULT_ACCESS) ] = PM_LD_REF_L1,
diff --git a/arch/powerpc/perf/ppc970-pmu.c b/arch/powerpc/perf/ppc970-pmu.c
index 4035d93..d35223f 100644
--- a/arch/powerpc/perf/ppc970-pmu.c
+++ b/arch/powerpc/perf/ppc970-pmu.c
@@ -253,7 +253,8 @@
 }
 
 static int p970_compute_mmcr(u64 event[], int n_ev,
-			     unsigned int hwc[], unsigned long mmcr[], struct perf_event *pevents[])
+			     unsigned int hwc[], struct mmcr_regs *mmcr,
+			     struct perf_event *pevents[])
 {
 	unsigned long mmcr0 = 0, mmcr1 = 0, mmcra = 0;
 	unsigned int pmc, unit, byte, psel;
@@ -393,27 +394,26 @@
 	mmcra |= 0x2000;	/* mark only one IOP per PPC instruction */
 
 	/* Return MMCRx values */
-	mmcr[0] = mmcr0;
-	mmcr[1] = mmcr1;
-	mmcr[2] = mmcra;
+	mmcr->mmcr0 = mmcr0;
+	mmcr->mmcr1 = mmcr1;
+	mmcr->mmcra = mmcra;
 	return 0;
 }
 
-static void p970_disable_pmc(unsigned int pmc, unsigned long mmcr[])
+static void p970_disable_pmc(unsigned int pmc, struct mmcr_regs *mmcr)
 {
-	int shift, i;
+	int shift;
 
-	if (pmc <= 1) {
-		shift = MMCR0_PMC1SEL_SH - 7 * pmc;
-		i = 0;
-	} else {
-		shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
-		i = 1;
-	}
 	/*
 	 * Setting the PMCxSEL field to 0x08 disables PMC x.
 	 */
-	mmcr[i] = (mmcr[i] & ~(0x1fUL << shift)) | (0x08UL << shift);
+	if (pmc <= 1) {
+		shift = MMCR0_PMC1SEL_SH - 7 * pmc;
+		mmcr->mmcr0 = (mmcr->mmcr0 & ~(0x1fUL << shift)) | (0x08UL << shift);
+	} else {
+		shift = MMCR1_PMC3SEL_SH - 5 * (pmc - 2);
+		mmcr->mmcr1 = (mmcr->mmcr1 & ~(0x1fUL << shift)) | (0x08UL << shift);
+	}
 }
 
 static int ppc970_generic_events[] = {
@@ -432,7 +432,7 @@
  * 0 means not supported, -1 means nonsensical, other values
  * are event codes.
  */
-static int ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
+static u64 ppc970_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = {
 	[C(L1D)] = {		/* 	RESULT_ACCESS	RESULT_MISS */
 		[C(OP_READ)] = {	0x8810,		0x3810	},
 		[C(OP_WRITE)] = {	0x7810,		0x813	},
diff --git a/arch/powerpc/platforms/52xx/lite5200_sleep.S b/arch/powerpc/platforms/52xx/lite5200_sleep.S
index 70083649..11475c5 100644
--- a/arch/powerpc/platforms/52xx/lite5200_sleep.S
+++ b/arch/powerpc/platforms/52xx/lite5200_sleep.S
@@ -56,7 +56,7 @@
 	/*
 	 * save stuff BDI overwrites
 	 * 0xf0 (0xe0->0x100 gets overwritten when BDI connected;
-	 *   even when CONFIG_BDI* is disabled and MMU XLAT commented; heisenbug?))
+	 *   even when CONFIG_BDI_SWITCH is disabled and MMU XLAT commented; heisenbug?))
 	 * WARNING: self-refresh doesn't seem to work when BDI2000 is connected,
 	 *   possibly because BDI sets SDRAM registers before wakeup code does
 	 */
diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig
index fa3d29d..b77cbb0 100644
--- a/arch/powerpc/platforms/85xx/Kconfig
+++ b/arch/powerpc/platforms/85xx/Kconfig
@@ -160,7 +160,7 @@
 	  computers from Extreme Engineering Solutions (X-ES) based on
 	  Freescale MPC85xx processors.
 	  Manufacturer: Extreme Engineering Solutions, Inc.
-	  URL: <http://www.xes-inc.com/>
+	  URL: <https://www.xes-inc.com/>
 
 config STX_GP3
 	bool "Silicon Turnkey Express GP3"
diff --git a/arch/powerpc/platforms/cell/Kconfig b/arch/powerpc/platforms/cell/Kconfig
index 0f7c824..f2ff359 100644
--- a/arch/powerpc/platforms/cell/Kconfig
+++ b/arch/powerpc/platforms/cell/Kconfig
@@ -44,6 +44,7 @@
 	tristate "SPU file system"
 	default m
 	depends on PPC_CELL
+	depends on COREDUMP
 	select SPU_BASE
 	help
 	  The SPU file system is used to access Synergistic Processing
diff --git a/arch/powerpc/platforms/cell/spufs/coredump.c b/arch/powerpc/platforms/cell/spufs/coredump.c
index 3b75e8f..026c181 100644
--- a/arch/powerpc/platforms/cell/spufs/coredump.c
+++ b/arch/powerpc/platforms/cell/spufs/coredump.c
@@ -66,13 +66,20 @@
  */
 static struct spu_context *coredump_next_context(int *fd)
 {
+	struct spu_context *ctx;
 	struct file *file;
 	int n = iterate_fd(current->files, *fd, match_context, NULL);
 	if (!n)
 		return NULL;
 	*fd = n - 1;
+
+	rcu_read_lock();
 	file = fcheck(*fd);
-	return SPUFS_I(file_inode(file))->i_ctx;
+	ctx = SPUFS_I(file_inode(file))->i_ctx;
+	get_spu_context(ctx);
+	rcu_read_unlock();
+
+	return ctx;
 }
 
 int spufs_coredump_extra_notes_size(void)
@@ -83,17 +90,23 @@
 	fd = 0;
 	while ((ctx = coredump_next_context(&fd)) != NULL) {
 		rc = spu_acquire_saved(ctx);
-		if (rc)
+		if (rc) {
+			put_spu_context(ctx);
 			break;
+		}
+
 		rc = spufs_ctx_note_size(ctx, fd);
 		spu_release_saved(ctx);
-		if (rc < 0)
+		if (rc < 0) {
+			put_spu_context(ctx);
 			break;
+		}
 
 		size += rc;
 
 		/* start searching the next fd next time */
 		fd++;
+		put_spu_context(ctx);
 	}
 
 	return size;
@@ -105,7 +118,7 @@
 	size_t sz = spufs_coredump_read[i].size;
 	char fullname[80];
 	struct elf_note en;
-	size_t ret;
+	int ret;
 
 	sprintf(fullname, "SPU/%d/%s", dfd, spufs_coredump_read[i].name);
 	en.n_namesz = strlen(fullname) + 1;
diff --git a/arch/powerpc/platforms/pasemi/misc.c b/arch/powerpc/platforms/pasemi/misc.c
index 1cd4ca1..1bf65d0 100644
--- a/arch/powerpc/platforms/pasemi/misc.c
+++ b/arch/powerpc/platforms/pasemi/misc.c
@@ -56,8 +56,7 @@
 		if (!adap_node)
 			continue;
 
-		node = NULL;
-		while ((node = of_get_next_child(adap_node, node))) {
+		for_each_child_of_node(adap_node, node) {
 			struct i2c_board_info info = {};
 			const u32 *addr;
 			int len;
diff --git a/arch/powerpc/platforms/powermac/feature.c b/arch/powerpc/platforms/powermac/feature.c
index 181caa3..5c77b9a 100644
--- a/arch/powerpc/platforms/powermac/feature.c
+++ b/arch/powerpc/platforms/powermac/feature.c
@@ -1465,7 +1465,7 @@
 	case 2:
 		if (macio->type == macio_shasta)
 			break;
-		/* fall through */
+		fallthrough;
 	default:
 		return -ENODEV;
 	}
diff --git a/arch/powerpc/platforms/powermac/low_i2c.c b/arch/powerpc/platforms/powermac/low_i2c.c
index bf4be4b..f77a59b 100644
--- a/arch/powerpc/platforms/powermac/low_i2c.c
+++ b/arch/powerpc/platforms/powermac/low_i2c.c
@@ -629,8 +629,7 @@
 			for (i = 0; i < chans; i++)
 				kw_i2c_add(host, np, np, i);
 		} else {
-			for (child = NULL;
-			     (child = of_get_next_child(np, child)) != NULL;) {
+			for_each_child_of_node(np, child) {
 				const u32 *reg = of_get_property(child,
 						"reg", NULL);
 				if (reg == NULL)
@@ -1193,8 +1192,7 @@
 	 * platform function instance
 	 */
 	list_for_each_entry(bus, &pmac_i2c_busses, link) {
-		for (np = NULL;
-		     (np = of_get_next_child(bus->busnode, np)) != NULL;) {
+		for_each_child_of_node(bus->busnode, np) {
 			struct whitelist_ent *p;
 			/* If multibus, check if device is on that bus */
 			if (bus->flags & pmac_i2c_multibus)
diff --git a/arch/powerpc/platforms/powermac/pfunc_base.c b/arch/powerpc/platforms/powermac/pfunc_base.c
index 62311e8..f542250 100644
--- a/arch/powerpc/platforms/powermac/pfunc_base.c
+++ b/arch/powerpc/platforms/powermac/pfunc_base.c
@@ -114,7 +114,7 @@
 	 * Ok, got one, we dont need anything special to track them down, so
 	 * we just create them all
 	 */
-	for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;) {
+	for_each_child_of_node(gparent, gp) {
 		const u32 *reg = of_get_property(gp, "reg", NULL);
 		unsigned long offset;
 		if (reg == NULL)
@@ -133,7 +133,7 @@
 	    macio->of_node);
 
 	/* And now we run all the init ones */
-	for (gp = NULL; (gp = of_get_next_child(gparent, gp)) != NULL;)
+	for_each_child_of_node(gparent, gp)
 		pmf_do_functions(gp, NULL, 0, PMF_FLAGS_ON_INIT, NULL);
 
 	/* Note: We do not at this point implement the "at sleep" or "at wake"
diff --git a/arch/powerpc/platforms/powermac/udbg_scc.c b/arch/powerpc/platforms/powermac/udbg_scc.c
index 6b61a18..f286bdf 100644
--- a/arch/powerpc/platforms/powermac/udbg_scc.c
+++ b/arch/powerpc/platforms/powermac/udbg_scc.c
@@ -80,7 +80,7 @@
 	path = of_get_property(of_chosen, "linux,stdout-path", NULL);
 	if (path != NULL)
 		stdout = of_find_node_by_path(path);
-	for (ch = NULL; (ch = of_get_next_child(escc, ch)) != NULL;) {
+	for_each_child_of_node(escc, ch) {
 		if (ch == stdout)
 			ch_def = of_node_get(ch);
 		if (of_node_name_eq(ch, "ch-a"))
diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile
index fe3f0fb..2eb6ae1 100644
--- a/arch/powerpc/platforms/powernv/Makefile
+++ b/arch/powerpc/platforms/powernv/Makefile
@@ -11,6 +11,7 @@
 obj-$(CONFIG_PRESERVE_FA_DUMP)	+= opal-fadump.o
 obj-$(CONFIG_OPAL_CORE)	+= opal-core.o
 obj-$(CONFIG_PCI)	+= pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
+obj-$(CONFIG_PCI_IOV)   += pci-sriov.o
 obj-$(CONFIG_CXL_BASE)	+= pci-cxl.o
 obj-$(CONFIG_EEH)	+= eeh-powernv.o
 obj-$(CONFIG_MEMORY_FAILURE)	+= opal-memory-errors.o
diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c b/arch/powerpc/platforms/powernv/eeh-powernv.c
index 79409e0..9af8c3b 100644
--- a/arch/powerpc/platforms/powernv/eeh-powernv.c
+++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
@@ -338,6 +338,28 @@
 	return 0;
 }
 
+static struct eeh_pe *pnv_eeh_get_upstream_pe(struct pci_dev *pdev)
+{
+	struct pci_controller *hose = pdev->bus->sysdata;
+	struct pnv_phb *phb = hose->private_data;
+	struct pci_dev *parent = pdev->bus->self;
+
+#ifdef CONFIG_PCI_IOV
+	/* for VFs we use the PF's PE as the upstream PE */
+	if (pdev->is_virtfn)
+		parent = pdev->physfn;
+#endif
+
+	/* otherwise use the PE of our parent bridge */
+	if (parent) {
+		struct pnv_ioda_pe *ioda_pe = pnv_ioda_get_pe(parent);
+
+		return eeh_pe_get(phb->hose, ioda_pe->pe_number, 0);
+	}
+
+	return NULL;
+}
+
 /**
  * pnv_eeh_probe - Do probe on PCI device
  * @pdev: pci_dev to probe
@@ -350,6 +372,7 @@
 	struct pci_controller *hose = pdn->phb;
 	struct pnv_phb *phb = hose->private_data;
 	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
+	struct eeh_pe *upstream_pe;
 	uint32_t pcie_flags;
 	int ret;
 	int config_addr = (pdn->busno << 8) | (pdn->devfn);
@@ -372,19 +395,18 @@
 	}
 
 	/* Skip for PCI-ISA bridge */
-	if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
+	if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
 		return NULL;
 
 	eeh_edev_dbg(edev, "Probing device\n");
 
 	/* Initialize eeh device */
-	edev->class_code = pdn->class_code;
 	edev->mode	&= 0xFFFFFF00;
 	edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
 	edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
 	edev->af_cap   = pnv_eeh_find_cap(pdn, PCI_CAP_ID_AF);
 	edev->aer_cap  = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
-	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
+	if ((pdev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
 		edev->mode |= EEH_DEV_BRIDGE;
 		if (edev->pcie_cap) {
 			pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
@@ -399,8 +421,10 @@
 
 	edev->pe_config_addr = phb->ioda.pe_rmap[config_addr];
 
+	upstream_pe = pnv_eeh_get_upstream_pe(pdev);
+
 	/* Create PE */
-	ret = eeh_add_to_parent_pe(edev);
+	ret = eeh_pe_tree_insert(edev, upstream_pe);
 	if (ret) {
 		eeh_edev_warn(edev, "Failed to add device to PE (code %d)\n", ret);
 		return NULL;
@@ -535,18 +559,6 @@
 	return 0;
 }
 
-/**
- * pnv_eeh_get_pe_addr - Retrieve PE address
- * @pe: EEH PE
- *
- * Retrieve the PE address according to the given tranditional
- * PCI BDF (Bus/Device/Function) address.
- */
-static int pnv_eeh_get_pe_addr(struct eeh_pe *pe)
-{
-	return pe->addr;
-}
-
 static void pnv_eeh_get_phb_diag(struct eeh_pe *pe)
 {
 	struct pnv_phb *phb = pe->phb->private_data;
@@ -850,32 +862,32 @@
 	case EEH_RESET_HOT:
 		/* Don't report linkDown event */
 		if (aer) {
-			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+			eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,
 					     4, &ctrl);
 			ctrl |= PCI_ERR_UNC_SURPDN;
-			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+			eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,
 					      4, ctrl);
 		}
 
-		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);
 		ctrl |= PCI_BRIDGE_CTL_BUS_RESET;
-		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
+		eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);
 
 		msleep(EEH_PE_RST_HOLD_TIME);
 		break;
 	case EEH_RESET_DEACTIVATE:
-		eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl);
+		eeh_ops->read_config(edev, PCI_BRIDGE_CONTROL, 2, &ctrl);
 		ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET;
-		eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl);
+		eeh_ops->write_config(edev, PCI_BRIDGE_CONTROL, 2, ctrl);
 
 		msleep(EEH_PE_RST_SETTLE_TIME);
 
 		/* Continue reporting linkDown event */
 		if (aer) {
-			eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+			eeh_ops->read_config(edev, aer + PCI_ERR_UNCOR_MASK,
 					     4, &ctrl);
 			ctrl &= ~PCI_ERR_UNC_SURPDN;
-			eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK,
+			eeh_ops->write_config(edev, aer + PCI_ERR_UNCOR_MASK,
 					      4, ctrl);
 		}
 
@@ -944,11 +956,12 @@
 static void pnv_eeh_wait_for_pending(struct pci_dn *pdn, const char *type,
 				     int pos, u16 mask)
 {
+	struct eeh_dev *edev = pdn->edev;
 	int i, status = 0;
 
 	/* Wait for Transaction Pending bit to be cleared */
 	for (i = 0; i < 4; i++) {
-		eeh_ops->read_config(pdn, pos, 2, &status);
+		eeh_ops->read_config(edev, pos, 2, &status);
 		if (!(status & mask))
 			return;
 
@@ -969,7 +982,7 @@
 	if (WARN_ON(!edev->pcie_cap))
 		return -ENOTTY;
 
-	eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
+	eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCAP, 4, &reg);
 	if (!(reg & PCI_EXP_DEVCAP_FLR))
 		return -ENOTTY;
 
@@ -979,18 +992,18 @@
 		pnv_eeh_wait_for_pending(pdn, "",
 					 edev->pcie_cap + PCI_EXP_DEVSTA,
 					 PCI_EXP_DEVSTA_TRPND);
-		eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+		eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
 				     4, &reg);
 		reg |= PCI_EXP_DEVCTL_BCR_FLR;
-		eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+		eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
 				      4, reg);
 		msleep(EEH_PE_RST_HOLD_TIME);
 		break;
 	case EEH_RESET_DEACTIVATE:
-		eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+		eeh_ops->read_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
 				     4, &reg);
 		reg &= ~PCI_EXP_DEVCTL_BCR_FLR;
-		eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL,
+		eeh_ops->write_config(edev, edev->pcie_cap + PCI_EXP_DEVCTL,
 				      4, reg);
 		msleep(EEH_PE_RST_SETTLE_TIME);
 		break;
@@ -1007,7 +1020,7 @@
 	if (WARN_ON(!edev->af_cap))
 		return -ENOTTY;
 
-	eeh_ops->read_config(pdn, edev->af_cap + PCI_AF_CAP, 1, &cap);
+	eeh_ops->read_config(edev, edev->af_cap + PCI_AF_CAP, 1, &cap);
 	if (!(cap & PCI_AF_CAP_TP) || !(cap & PCI_AF_CAP_FLR))
 		return -ENOTTY;
 
@@ -1022,12 +1035,12 @@
 		pnv_eeh_wait_for_pending(pdn, "AF",
 					 edev->af_cap + PCI_AF_CTRL,
 					 PCI_AF_STATUS_TP << 8);
-		eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL,
+		eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL,
 				      1, PCI_AF_CTRL_FLR);
 		msleep(EEH_PE_RST_HOLD_TIME);
 		break;
 	case EEH_RESET_DEACTIVATE:
-		eeh_ops->write_config(pdn, edev->af_cap + PCI_AF_CTRL, 1, 0);
+		eeh_ops->write_config(edev, edev->af_cap + PCI_AF_CTRL, 1, 0);
 		msleep(EEH_PE_RST_SETTLE_TIME);
 		break;
 	}
@@ -1261,9 +1274,11 @@
 	return false;
 }
 
-static int pnv_eeh_read_config(struct pci_dn *pdn,
+static int pnv_eeh_read_config(struct eeh_dev *edev,
 			       int where, int size, u32 *val)
 {
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
 	if (!pdn)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
@@ -1275,9 +1290,11 @@
 	return pnv_pci_cfg_read(pdn, where, size, val);
 }
 
-static int pnv_eeh_write_config(struct pci_dn *pdn,
+static int pnv_eeh_write_config(struct eeh_dev *edev,
 				int where, int size, u32 val)
 {
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
 	if (!pdn)
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
@@ -1631,34 +1648,24 @@
 	return ret;
 }
 
-static int pnv_eeh_restore_config(struct pci_dn *pdn)
+static int pnv_eeh_restore_config(struct eeh_dev *edev)
 {
-	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 	struct pnv_phb *phb;
 	s64 ret = 0;
-	int config_addr = (pdn->busno << 8) | (pdn->devfn);
 
 	if (!edev)
 		return -EEXIST;
 
-	/*
-	 * We have to restore the PCI config space after reset since the
-	 * firmware can't see SRIOV VFs.
-	 *
-	 * FIXME: The MPS, error routing rules, timeout setting are worthy
-	 * to be exported by firmware in extendible way.
-	 */
-	if (edev->physfn) {
-		ret = eeh_restore_vf_config(pdn);
-	} else {
-		phb = pdn->phb->private_data;
-		ret = opal_pci_reinit(phb->opal_id,
-				      OPAL_REINIT_PCI_DEV, config_addr);
-	}
+	if (edev->physfn)
+		return 0;
+
+	phb = edev->controller->private_data;
+	ret = opal_pci_reinit(phb->opal_id,
+			      OPAL_REINIT_PCI_DEV, edev->bdfn);
 
 	if (ret) {
 		pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
-			__func__, config_addr, ret);
+			__func__, edev->bdfn, ret);
 		return -EIO;
 	}
 
@@ -1670,7 +1677,6 @@
 	.init                   = pnv_eeh_init,
 	.probe			= pnv_eeh_probe,
 	.set_option             = pnv_eeh_set_option,
-	.get_pe_addr            = pnv_eeh_get_pe_addr,
 	.get_state              = pnv_eeh_get_state,
 	.reset                  = pnv_eeh_reset,
 	.get_log                = pnv_eeh_get_log,
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 2dd4673..77513a80 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -48,7 +48,7 @@
  * First stop state levels when SPR and TB loss can occur.
  */
 static u64 pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
-static u64 pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
+static u64 deep_spr_loss_state = MAX_STOP_STATE + 1;
 
 /*
  * psscr value and mask of the deepest stop idle state.
@@ -73,9 +73,6 @@
 	 */
 	uint64_t lpcr_val	= mfspr(SPRN_LPCR);
 	uint64_t hid0_val	= mfspr(SPRN_HID0);
-	uint64_t hid1_val	= mfspr(SPRN_HID1);
-	uint64_t hid4_val	= mfspr(SPRN_HID4);
-	uint64_t hid5_val	= mfspr(SPRN_HID5);
 	uint64_t hmeer_val	= mfspr(SPRN_HMEER);
 	uint64_t msr_val = MSR_IDLE;
 	uint64_t psscr_val = pnv_deepest_stop_psscr_val;
@@ -117,6 +114,9 @@
 
 			/* Only p8 needs to set extra HID regiters */
 			if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+				uint64_t hid1_val = mfspr(SPRN_HID1);
+				uint64_t hid4_val = mfspr(SPRN_HID4);
+				uint64_t hid5_val = mfspr(SPRN_HID5);
 
 				rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
 				if (rc != 0)
@@ -611,6 +611,7 @@
 	unsigned long srr1;
 	unsigned long pls;
 	unsigned long mmcr0 = 0;
+	unsigned long mmcra = 0;
 	struct p9_sprs sprs = {}; /* avoid false used-uninitialised */
 	bool sprs_saved = false;
 
@@ -657,7 +658,22 @@
 		  */
 		mmcr0		= mfspr(SPRN_MMCR0);
 	}
-	if ((psscr & PSSCR_RL_MASK) >= pnv_first_spr_loss_level) {
+
+	if (cpu_has_feature(CPU_FTR_ARCH_31)) {
+		/*
+		 * POWER10 uses MMCRA (BHRBRD) as BHRB disable bit.
+		 * If the user hasn't asked for the BHRB to be
+		 * written, the value of MMCRA[BHRBRD] is 1.
+		 * On wakeup from stop, MMCRA[BHRBD] will be 0,
+		 * since it is previleged resource and will be lost.
+		 * Thus, if we do not save and restore the MMCRA[BHRBD],
+		 * hardware will be needlessly writing to the BHRB
+		 * in problem mode.
+		 */
+		mmcra		= mfspr(SPRN_MMCRA);
+	}
+
+	if ((psscr & PSSCR_RL_MASK) >= deep_spr_loss_state) {
 		sprs.lpcr	= mfspr(SPRN_LPCR);
 		sprs.hfscr	= mfspr(SPRN_HFSCR);
 		sprs.fscr	= mfspr(SPRN_FSCR);
@@ -700,8 +716,6 @@
 	WARN_ON_ONCE(mfmsr() & (MSR_IR|MSR_DR));
 
 	if ((srr1 & SRR1_WAKESTATE) != SRR1_WS_NOLOSS) {
-		unsigned long mmcra;
-
 		/*
 		 * We don't need an isync after the mtsprs here because the
 		 * upcoming mtmsrd is execution synchronizing.
@@ -721,6 +735,10 @@
 			mtspr(SPRN_MMCR0, mmcr0);
 		}
 
+		/* Reload MMCRA to restore BHRB disable bit for POWER10 */
+		if (cpu_has_feature(CPU_FTR_ARCH_31))
+			mtspr(SPRN_MMCRA, mmcra);
+
 		/*
 		 * DD2.2 and earlier need to set then clear bit 60 in MMCRA
 		 * to ensure the PMU starts running.
@@ -741,7 +759,7 @@
 	 * just always test PSSCR for SPR/TB state loss.
 	 */
 	pls = (psscr & PSSCR_PLS) >> PSSCR_PLS_SHIFT;
-	if (likely(pls < pnv_first_spr_loss_level)) {
+	if (likely(pls < deep_spr_loss_state)) {
 		if (sprs_saved)
 			atomic_stop_thread_idle();
 		goto out;
@@ -1088,7 +1106,7 @@
 	 * the deepest loss-less (OPAL_PM_STOP_INST_FAST) stop state.
 	 */
 	pnv_first_tb_loss_level = MAX_STOP_STATE + 1;
-	pnv_first_spr_loss_level = MAX_STOP_STATE + 1;
+	deep_spr_loss_state = MAX_STOP_STATE + 1;
 	for (i = 0; i < nr_pnv_idle_states; i++) {
 		int err;
 		struct pnv_idle_states_t *state = &pnv_idle_states[i];
@@ -1099,8 +1117,8 @@
 			pnv_first_tb_loss_level = psscr_rl;
 
 		if ((state->flags & OPAL_PM_LOSE_FULL_CONTEXT) &&
-		     (pnv_first_spr_loss_level > psscr_rl))
-			pnv_first_spr_loss_level = psscr_rl;
+		     (deep_spr_loss_state > psscr_rl))
+			deep_spr_loss_state = psscr_rl;
 
 		/*
 		 * The idle code does not deal with TB loss occurring
@@ -1111,8 +1129,8 @@
 		 * compatibility.
 		 */
 		if ((state->flags & OPAL_PM_TIMEBASE_STOP) &&
-		     (pnv_first_spr_loss_level > psscr_rl))
-			pnv_first_spr_loss_level = psscr_rl;
+		     (deep_spr_loss_state > psscr_rl))
+			deep_spr_loss_state = psscr_rl;
 
 		err = validate_psscr_val_mask(&state->psscr_val,
 					      &state->psscr_mask,
@@ -1158,7 +1176,7 @@
 	}
 
 	pr_info("cpuidle-powernv: First stop level that may lose SPRs = 0x%llx\n",
-		pnv_first_spr_loss_level);
+		deep_spr_loss_state);
 
 	pr_info("cpuidle-powernv: First stop level that may lose timebase = 0x%llx\n",
 		pnv_first_tb_loss_level);
@@ -1205,7 +1223,7 @@
 		return;
 	}
 
-	if (cpu_has_feature(CPU_FTR_ARCH_300))
+	if (pvr_version_is(PVR_POWER9))
 		pnv_power9_idle_init();
 
 	for (i = 0; i < nr_pnv_idle_states; i++)
diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c
index 1656e89..c094fdf 100644
--- a/arch/powerpc/platforms/powernv/opal-async.c
+++ b/arch/powerpc/platforms/powernv/opal-async.c
@@ -104,7 +104,7 @@
 	 */
 	case ASYNC_TOKEN_DISPATCHED:
 		opal_async_tokens[token].state = ASYNC_TOKEN_ABANDONED;
-		/* Fall through */
+		fallthrough;
 	default:
 		rc = 1;
 	}
diff --git a/arch/powerpc/platforms/powernv/pci-ioda-tce.c b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
index f923359..5218f5d 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda-tce.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda-tce.c
@@ -166,7 +166,7 @@
 	if (!ptce) {
 		ptce = pnv_tce(tbl, false, idx, alloc);
 		if (!ptce)
-			return alloc ? H_HARDWARE : H_TOO_HARD;
+			return -ENOMEM;
 	}
 
 	if (newtce & TCE_PCI_WRITE)
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 73a63efc..c9c25fb 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -115,32 +115,13 @@
 
 early_param("ppc_pci_reset_phbs", pci_reset_phbs_setup);
 
-static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
-{
-	/*
-	 * WARNING: We cannot rely on the resource flags. The Linux PCI
-	 * allocation code sometimes decides to put a 64-bit prefetchable
-	 * BAR in the 32-bit window, so we have to compare the addresses.
-	 *
-	 * For simplicity we only test resource start.
-	 */
-	return (r->start >= phb->ioda.m64_base &&
-		r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
-}
-
-static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
-{
-	unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
-
-	return (resource_flags & flags) == flags;
-}
-
 static struct pnv_ioda_pe *pnv_ioda_init_pe(struct pnv_phb *phb, int pe_no)
 {
 	s64 rc;
 
 	phb->ioda.pe_array[pe_no].phb = phb;
 	phb->ioda.pe_array[pe_no].pe_number = pe_no;
+	phb->ioda.pe_array[pe_no].dma_setup_done = false;
 
 	/*
 	 * Clear the PE frozen state as it might be put into frozen state
@@ -164,26 +145,48 @@
 		return;
 	}
 
+	mutex_lock(&phb->ioda.pe_alloc_mutex);
 	if (test_and_set_bit(pe_no, phb->ioda.pe_alloc))
 		pr_debug("%s: PE %x was reserved on PHB#%x\n",
 			 __func__, pe_no, phb->hose->global_number);
+	mutex_unlock(&phb->ioda.pe_alloc_mutex);
 
 	pnv_ioda_init_pe(phb, pe_no);
 }
 
-static struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb)
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count)
 {
-	long pe;
+	struct pnv_ioda_pe *ret = NULL;
+	int run = 0, pe, i;
 
+	mutex_lock(&phb->ioda.pe_alloc_mutex);
+
+	/* scan backwards for a run of @count cleared bits */
 	for (pe = phb->ioda.total_pe_num - 1; pe >= 0; pe--) {
-		if (!test_and_set_bit(pe, phb->ioda.pe_alloc))
-			return pnv_ioda_init_pe(phb, pe);
-	}
+		if (test_bit(pe, phb->ioda.pe_alloc)) {
+			run = 0;
+			continue;
+		}
 
-	return NULL;
+		run++;
+		if (run == count)
+			break;
+	}
+	if (run != count)
+		goto out;
+
+	for (i = pe; i < pe + count; i++) {
+		set_bit(i, phb->ioda.pe_alloc);
+		pnv_ioda_init_pe(phb, i);
+	}
+	ret = &phb->ioda.pe_array[pe];
+
+out:
+	mutex_unlock(&phb->ioda.pe_alloc_mutex);
+	return ret;
 }
 
-static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
+void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
 {
 	struct pnv_phb *phb = pe->phb;
 	unsigned int pe_num = pe->pe_number;
@@ -192,7 +195,10 @@
 	WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */
 	kfree(pe->npucomp);
 	memset(pe, 0, sizeof(struct pnv_ioda_pe));
+
+	mutex_lock(&phb->ioda.pe_alloc_mutex);
 	clear_bit(pe_num, phb->ioda.pe_alloc);
+	mutex_unlock(&phb->ioda.pe_alloc_mutex);
 }
 
 /* The default M64 BAR is shared by all PEs */
@@ -252,8 +258,7 @@
 static void pnv_ioda_reserve_dev_m64_pe(struct pci_dev *pdev,
 					 unsigned long *pe_bitmap)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct resource *r;
 	resource_size_t base, sgsz, start, end;
 	int segno, i;
@@ -311,6 +316,28 @@
 		}
 	}
 
+	for (index = 0; index < phb->ioda.total_pe_num; index++) {
+		int64_t rc;
+
+		/*
+		 * P7IOC supports M64DT, which helps mapping M64 segment
+		 * to one particular PE#. However, PHB3 has fixed mapping
+		 * between M64 segment and PE#. In order to have same logic
+		 * for P7IOC and PHB3, we enforce fixed mapping between M64
+		 * segment and PE# on P7IOC.
+		 */
+		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+				index, OPAL_M64_WINDOW_TYPE,
+				index / PNV_IODA1_M64_SEGS,
+				index % PNV_IODA1_M64_SEGS);
+		if (rc != OPAL_SUCCESS) {
+			pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n",
+				__func__, rc, phb->hose->global_number,
+				index);
+			goto fail;
+		}
+	}
+
 	/*
 	 * Exclude the segments for reserved and root bus PE, which
 	 * are first or last two PEs.
@@ -351,8 +378,7 @@
 
 static struct pnv_ioda_pe *pnv_ioda_pick_m64_pe(struct pci_bus *bus, bool all)
 {
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
 	struct pnv_ioda_pe *master_pe, *pe;
 	unsigned long size, *pe_alloc;
 	int i;
@@ -403,26 +429,6 @@
 			pe->master = master_pe;
 			list_add_tail(&pe->list, &master_pe->slaves);
 		}
-
-		/*
-		 * P7IOC supports M64DT, which helps mapping M64 segment
-		 * to one particular PE#. However, PHB3 has fixed mapping
-		 * between M64 segment and PE#. In order to have same logic
-		 * for P7IOC and PHB3, we enforce fixed mapping between M64
-		 * segment and PE# on P7IOC.
-		 */
-		if (phb->type == PNV_PHB_IODA1) {
-			int64_t rc;
-
-			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-					pe->pe_number, OPAL_M64_WINDOW_TYPE,
-					pe->pe_number / PNV_IODA1_M64_SEGS,
-					pe->pe_number % PNV_IODA1_M64_SEGS);
-			if (rc != OPAL_SUCCESS)
-				pr_warn("%s: Error %lld mapping M64 for PHB#%x-PE#%x\n",
-					__func__, rc, phb->hose->global_number,
-					pe->pe_number);
-		}
 	}
 
 	kfree(pe_alloc);
@@ -673,8 +679,7 @@
 
 struct pnv_ioda_pe *pnv_ioda_get_pe(struct pci_dev *dev)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 	struct pci_dn *pdn = pci_get_pdn(dev);
 
 	if (!pdn)
@@ -816,7 +821,7 @@
 		pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
 }
 
-static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
 	struct pci_dev *parent;
 	uint8_t bcomp, dcomp, fcomp;
@@ -887,7 +892,7 @@
 	return 0;
 }
 
-static int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
+int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
 	struct pci_dev *parent;
 	uint8_t bcomp, dcomp, fcomp;
@@ -982,95 +987,9 @@
 	return 0;
 }
 
-#ifdef CONFIG_PCI_IOV
-static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
-{
-	struct pci_dn *pdn = pci_get_pdn(dev);
-	int i;
-	struct resource *res, res2;
-	resource_size_t size;
-	u16 num_vfs;
-
-	if (!dev->is_physfn)
-		return -EINVAL;
-
-	/*
-	 * "offset" is in VFs.  The M64 windows are sized so that when they
-	 * are segmented, each segment is the same size as the IOV BAR.
-	 * Each segment is in a separate PE, and the high order bits of the
-	 * address are the PE number.  Therefore, each VF's BAR is in a
-	 * separate PE, and changing the IOV BAR start address changes the
-	 * range of PEs the VFs are in.
-	 */
-	num_vfs = pdn->num_vfs;
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &dev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || !res->parent)
-			continue;
-
-		/*
-		 * The actual IOV BAR range is determined by the start address
-		 * and the actual size for num_vfs VFs BAR.  This check is to
-		 * make sure that after shifting, the range will not overlap
-		 * with another device.
-		 */
-		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
-		res2.flags = res->flags;
-		res2.start = res->start + (size * offset);
-		res2.end = res2.start + (size * num_vfs) - 1;
-
-		if (res2.end > res->end) {
-			dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
-				i, &res2, res, num_vfs, offset);
-			return -EBUSY;
-		}
-	}
-
-	/*
-	 * Since M64 BAR shares segments among all possible 256 PEs,
-	 * we have to shift the beginning of PF IOV BAR to make it start from
-	 * the segment which belongs to the PE number assigned to the first VF.
-	 * This creates a "hole" in the /proc/iomem which could be used for
-	 * allocating other resources so we reserve this area below and
-	 * release when IOV is released.
-	 */
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &dev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || !res->parent)
-			continue;
-
-		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
-		res2 = *res;
-		res->start += size * offset;
-
-		dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
-			 i, &res2, res, (offset > 0) ? "En" : "Dis",
-			 num_vfs, offset);
-
-		if (offset < 0) {
-			devm_release_resource(&dev->dev, &pdn->holes[i]);
-			memset(&pdn->holes[i], 0, sizeof(pdn->holes[i]));
-		}
-
-		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
-
-		if (offset > 0) {
-			pdn->holes[i].start = res2.start;
-			pdn->holes[i].end = res2.start + size * offset - 1;
-			pdn->holes[i].flags = IORESOURCE_BUS;
-			pdn->holes[i].name = "pnv_iov_reserved";
-			devm_request_resource(&dev->dev, res->parent,
-					&pdn->holes[i]);
-		}
-	}
-	return 0;
-}
-#endif /* CONFIG_PCI_IOV */
-
 static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 	struct pci_dn *pdn = pci_get_pdn(dev);
 	struct pnv_ioda_pe *pe;
 
@@ -1082,7 +1001,7 @@
 	if (pdn->pe_number != IODA_INVALID_PE)
 		return NULL;
 
-	pe = pnv_ioda_alloc_pe(phb);
+	pe = pnv_ioda_alloc_pe(phb, 1);
 	if (!pe) {
 		pr_warn("%s: Not enough PE# available, disabling device\n",
 			pci_name(dev));
@@ -1129,8 +1048,7 @@
  */
 static struct pnv_ioda_pe *pnv_ioda_setup_bus_PE(struct pci_bus *bus, bool all)
 {
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
 	struct pnv_ioda_pe *pe = NULL;
 	unsigned int pe_num;
 
@@ -1154,7 +1072,7 @@
 
 	/* The PE number isn't pinned by M64 */
 	if (!pe)
-		pe = pnv_ioda_alloc_pe(phb);
+		pe = pnv_ioda_alloc_pe(phb, 1);
 
 	if (!pe) {
 		pr_warn("%s: Not enough PE# available for PCI bus %04x:%02x\n",
@@ -1196,8 +1114,7 @@
 	struct pnv_ioda_pe *pe;
 	struct pci_dev *gpu_pdev;
 	struct pci_dn *npu_pdn;
-	struct pci_controller *hose = pci_bus_to_host(npu_pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus);
 
 	/*
 	 * Intentionally leak a reference on the npu device (for
@@ -1297,446 +1214,12 @@
 #endif
 }
 
-#ifdef CONFIG_PCI_IOV
-static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
-{
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
-	struct pnv_phb        *phb;
-	struct pci_dn         *pdn;
-	int                    i, j;
-	int                    m64_bars;
-
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
-	pdn = pci_get_pdn(pdev);
-
-	if (pdn->m64_single_mode)
-		m64_bars = num_vfs;
-	else
-		m64_bars = 1;
-
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
-		for (j = 0; j < m64_bars; j++) {
-			if (pdn->m64_map[j][i] == IODA_INVALID_M64)
-				continue;
-			opal_pci_phb_mmio_enable(phb->opal_id,
-				OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 0);
-			clear_bit(pdn->m64_map[j][i], &phb->ioda.m64_bar_alloc);
-			pdn->m64_map[j][i] = IODA_INVALID_M64;
-		}
-
-	kfree(pdn->m64_map);
-	return 0;
-}
-
-static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
-{
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
-	struct pnv_phb        *phb;
-	struct pci_dn         *pdn;
-	unsigned int           win;
-	struct resource       *res;
-	int                    i, j;
-	int64_t                rc;
-	int                    total_vfs;
-	resource_size_t        size, start;
-	int                    pe_num;
-	int                    m64_bars;
-
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
-	pdn = pci_get_pdn(pdev);
-	total_vfs = pci_sriov_get_totalvfs(pdev);
-
-	if (pdn->m64_single_mode)
-		m64_bars = num_vfs;
-	else
-		m64_bars = 1;
-
-	pdn->m64_map = kmalloc_array(m64_bars,
-				     sizeof(*pdn->m64_map),
-				     GFP_KERNEL);
-	if (!pdn->m64_map)
-		return -ENOMEM;
-	/* Initialize the m64_map to IODA_INVALID_M64 */
-	for (i = 0; i < m64_bars ; i++)
-		for (j = 0; j < PCI_SRIOV_NUM_BARS; j++)
-			pdn->m64_map[i][j] = IODA_INVALID_M64;
-
-
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &pdev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || !res->parent)
-			continue;
-
-		for (j = 0; j < m64_bars; j++) {
-			do {
-				win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
-						phb->ioda.m64_bar_idx + 1, 0);
-
-				if (win >= phb->ioda.m64_bar_idx + 1)
-					goto m64_failed;
-			} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
-
-			pdn->m64_map[j][i] = win;
-
-			if (pdn->m64_single_mode) {
-				size = pci_iov_resource_size(pdev,
-							PCI_IOV_RESOURCES + i);
-				start = res->start + size * j;
-			} else {
-				size = resource_size(res);
-				start = res->start;
-			}
-
-			/* Map the M64 here */
-			if (pdn->m64_single_mode) {
-				pe_num = pdn->pe_num_map[j];
-				rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-						pe_num, OPAL_M64_WINDOW_TYPE,
-						pdn->m64_map[j][i], 0);
-			}
-
-			rc = opal_pci_set_phb_mem_window(phb->opal_id,
-						 OPAL_M64_WINDOW_TYPE,
-						 pdn->m64_map[j][i],
-						 start,
-						 0, /* unused */
-						 size);
-
-
-			if (rc != OPAL_SUCCESS) {
-				dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n",
-					win, rc);
-				goto m64_failed;
-			}
-
-			if (pdn->m64_single_mode)
-				rc = opal_pci_phb_mmio_enable(phb->opal_id,
-				     OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 2);
-			else
-				rc = opal_pci_phb_mmio_enable(phb->opal_id,
-				     OPAL_M64_WINDOW_TYPE, pdn->m64_map[j][i], 1);
-
-			if (rc != OPAL_SUCCESS) {
-				dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n",
-					win, rc);
-				goto m64_failed;
-			}
-		}
-	}
-	return 0;
-
-m64_failed:
-	pnv_pci_vf_release_m64(pdev, num_vfs);
-	return -EBUSY;
-}
-
-static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
-		int num);
-
-static void pnv_pci_ioda2_release_dma_pe(struct pci_dev *dev, struct pnv_ioda_pe *pe)
-{
-	struct iommu_table    *tbl;
-	int64_t               rc;
-
-	tbl = pe->table_group.tables[0];
-	rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
-	if (rc)
-		pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
-
-	pnv_pci_ioda2_set_bypass(pe, false);
-	if (pe->table_group.group) {
-		iommu_group_put(pe->table_group.group);
-		BUG_ON(pe->table_group.group);
-	}
-	iommu_tce_table_put(tbl);
-}
-
-static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
-{
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
-	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe, *pe_n;
-	struct pci_dn         *pdn;
-
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
-	pdn = pci_get_pdn(pdev);
-
-	if (!pdev->is_physfn)
-		return;
-
-	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
-		if (pe->parent_dev != pdev)
-			continue;
-
-		pnv_pci_ioda2_release_dma_pe(pdev, pe);
-
-		/* Remove from list */
-		mutex_lock(&phb->ioda.pe_list_mutex);
-		list_del(&pe->list);
-		mutex_unlock(&phb->ioda.pe_list_mutex);
-
-		pnv_ioda_deconfigure_pe(phb, pe);
-
-		pnv_ioda_free_pe(pe);
-	}
-}
-
-void pnv_pci_sriov_disable(struct pci_dev *pdev)
-{
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
-	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe;
-	struct pci_dn         *pdn;
-	u16                    num_vfs, i;
-
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
-	pdn = pci_get_pdn(pdev);
-	num_vfs = pdn->num_vfs;
-
-	/* Release VF PEs */
-	pnv_ioda_release_vf_PE(pdev);
-
-	if (phb->type == PNV_PHB_IODA2) {
-		if (!pdn->m64_single_mode)
-			pnv_pci_vf_resource_shift(pdev, -*pdn->pe_num_map);
-
-		/* Release M64 windows */
-		pnv_pci_vf_release_m64(pdev, num_vfs);
-
-		/* Release PE numbers */
-		if (pdn->m64_single_mode) {
-			for (i = 0; i < num_vfs; i++) {
-				if (pdn->pe_num_map[i] == IODA_INVALID_PE)
-					continue;
-
-				pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
-				pnv_ioda_free_pe(pe);
-			}
-		} else
-			bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
-		/* Releasing pe_num_map */
-		kfree(pdn->pe_num_map);
-	}
-}
-
-static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
 				       struct pnv_ioda_pe *pe);
-static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
-{
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
-	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe;
-	int                    pe_num;
-	u16                    vf_index;
-	struct pci_dn         *pdn;
-
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
-	pdn = pci_get_pdn(pdev);
-
-	if (!pdev->is_physfn)
-		return;
-
-	/* Reserve PE for each VF */
-	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
-		int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
-		int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
-		struct pci_dn *vf_pdn;
-
-		if (pdn->m64_single_mode)
-			pe_num = pdn->pe_num_map[vf_index];
-		else
-			pe_num = *pdn->pe_num_map + vf_index;
-
-		pe = &phb->ioda.pe_array[pe_num];
-		pe->pe_number = pe_num;
-		pe->phb = phb;
-		pe->flags = PNV_IODA_PE_VF;
-		pe->pbus = NULL;
-		pe->parent_dev = pdev;
-		pe->mve_number = -1;
-		pe->rid = (vf_bus << 8) | vf_devfn;
-
-		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
-			hose->global_number, pdev->bus->number,
-			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
-
-		if (pnv_ioda_configure_pe(phb, pe)) {
-			/* XXX What do we do here ? */
-			pnv_ioda_free_pe(pe);
-			pe->pdev = NULL;
-			continue;
-		}
-
-		/* Put PE to the list */
-		mutex_lock(&phb->ioda.pe_list_mutex);
-		list_add_tail(&pe->list, &phb->ioda.pe_list);
-		mutex_unlock(&phb->ioda.pe_list_mutex);
-
-		/* associate this pe to it's pdn */
-		list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
-			if (vf_pdn->busno == vf_bus &&
-			    vf_pdn->devfn == vf_devfn) {
-				vf_pdn->pe_number = pe_num;
-				break;
-			}
-		}
-
-		pnv_pci_ioda2_setup_dma_pe(phb, pe);
-	}
-}
-
-int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
-{
-	struct pci_bus        *bus;
-	struct pci_controller *hose;
-	struct pnv_phb        *phb;
-	struct pnv_ioda_pe    *pe;
-	struct pci_dn         *pdn;
-	int                    ret;
-	u16                    i;
-
-	bus = pdev->bus;
-	hose = pci_bus_to_host(bus);
-	phb = hose->private_data;
-	pdn = pci_get_pdn(pdev);
-
-	if (phb->type == PNV_PHB_IODA2) {
-		if (!pdn->vfs_expanded) {
-			dev_info(&pdev->dev, "don't support this SRIOV device"
-				" with non 64bit-prefetchable IOV BAR\n");
-			return -ENOSPC;
-		}
-
-		/*
-		 * When M64 BARs functions in Single PE mode, the number of VFs
-		 * could be enabled must be less than the number of M64 BARs.
-		 */
-		if (pdn->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) {
-			dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n");
-			return -EBUSY;
-		}
-
-		/* Allocating pe_num_map */
-		if (pdn->m64_single_mode)
-			pdn->pe_num_map = kmalloc_array(num_vfs,
-							sizeof(*pdn->pe_num_map),
-							GFP_KERNEL);
-		else
-			pdn->pe_num_map = kmalloc(sizeof(*pdn->pe_num_map), GFP_KERNEL);
-
-		if (!pdn->pe_num_map)
-			return -ENOMEM;
-
-		if (pdn->m64_single_mode)
-			for (i = 0; i < num_vfs; i++)
-				pdn->pe_num_map[i] = IODA_INVALID_PE;
-
-		/* Calculate available PE for required VFs */
-		if (pdn->m64_single_mode) {
-			for (i = 0; i < num_vfs; i++) {
-				pe = pnv_ioda_alloc_pe(phb);
-				if (!pe) {
-					ret = -EBUSY;
-					goto m64_failed;
-				}
-
-				pdn->pe_num_map[i] = pe->pe_number;
-			}
-		} else {
-			mutex_lock(&phb->ioda.pe_alloc_mutex);
-			*pdn->pe_num_map = bitmap_find_next_zero_area(
-				phb->ioda.pe_alloc, phb->ioda.total_pe_num,
-				0, num_vfs, 0);
-			if (*pdn->pe_num_map >= phb->ioda.total_pe_num) {
-				mutex_unlock(&phb->ioda.pe_alloc_mutex);
-				dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs);
-				kfree(pdn->pe_num_map);
-				return -EBUSY;
-			}
-			bitmap_set(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
-			mutex_unlock(&phb->ioda.pe_alloc_mutex);
-		}
-		pdn->num_vfs = num_vfs;
-
-		/* Assign M64 window accordingly */
-		ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
-		if (ret) {
-			dev_info(&pdev->dev, "Not enough M64 window resources\n");
-			goto m64_failed;
-		}
-
-		/*
-		 * When using one M64 BAR to map one IOV BAR, we need to shift
-		 * the IOV BAR according to the PE# allocated to the VFs.
-		 * Otherwise, the PE# for the VF will conflict with others.
-		 */
-		if (!pdn->m64_single_mode) {
-			ret = pnv_pci_vf_resource_shift(pdev, *pdn->pe_num_map);
-			if (ret)
-				goto m64_failed;
-		}
-	}
-
-	/* Setup VF PEs */
-	pnv_ioda_setup_vf_PE(pdev, num_vfs);
-
-	return 0;
-
-m64_failed:
-	if (pdn->m64_single_mode) {
-		for (i = 0; i < num_vfs; i++) {
-			if (pdn->pe_num_map[i] == IODA_INVALID_PE)
-				continue;
-
-			pe = &phb->ioda.pe_array[pdn->pe_num_map[i]];
-			pnv_ioda_free_pe(pe);
-		}
-	} else
-		bitmap_clear(phb->ioda.pe_alloc, *pdn->pe_num_map, num_vfs);
-
-	/* Releasing pe_num_map */
-	kfree(pdn->pe_num_map);
-
-	return ret;
-}
-
-int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
-{
-	pnv_pci_sriov_disable(pdev);
-
-	/* Release PCI data */
-	remove_sriov_vf_pdns(pdev);
-	return 0;
-}
-
-int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
-{
-	/* Allocate PCI data */
-	add_sriov_vf_pdns(pdev);
-
-	return pnv_pci_sriov_enable(pdev, num_vfs);
-}
-#endif /* CONFIG_PCI_IOV */
 
 static void pnv_pci_ioda_dma_dev_setup(struct pci_dev *pdev)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct pci_dn *pdn = pci_get_pdn(pdev);
 	struct pnv_ioda_pe *pe;
 
@@ -1762,6 +1245,24 @@
 		pci_info(pdev, "Added to existing PE#%x\n", pe->pe_number);
 	}
 
+	/*
+	 * We assume that bridges *probably* don't need to do any DMA so we can
+	 * skip allocating a TCE table, etc unless we get a non-bridge device.
+	 */
+	if (!pe->dma_setup_done && !pci_is_bridge(pdev)) {
+		switch (phb->type) {
+		case PNV_PHB_IODA1:
+			pnv_pci_ioda1_setup_dma_pe(phb, pe);
+			break;
+		case PNV_PHB_IODA2:
+			pnv_pci_ioda2_setup_dma_pe(phb, pe);
+			break;
+		default:
+			pr_warn("%s: No DMA for PHB#%x (type %d)\n",
+				__func__, phb->hose->global_number, phb->type);
+		}
+	}
+
 	if (pdn)
 		pdn->pe_number = pe->pe_number;
 	pe->device_count++;
@@ -1847,8 +1348,7 @@
 static bool pnv_pci_ioda_iommu_bypass_supported(struct pci_dev *pdev,
 		u64 dma_mask)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct pci_dn *pdn = pci_get_pdn(pdev);
 	struct pnv_ioda_pe *pe;
 
@@ -1885,19 +1385,6 @@
 	return false;
 }
 
-static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
-{
-	struct pci_dev *dev;
-
-	list_for_each_entry(dev, &bus->devices, bus_list) {
-		set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
-		dev->dev.archdata.dma_offset = pe->tce_bypass_base;
-
-		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
-			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
-	}
-}
-
 static inline __be64 __iomem *pnv_ioda_get_inval_reg(struct pnv_phb *phb,
 						     bool real_mode)
 {
@@ -2285,6 +1772,7 @@
 	pe->table_group.tce32_size = tbl->it_size << tbl->it_page_shift;
 	iommu_init_table(tbl, phb->hose->node, 0, 0);
 
+	pe->dma_setup_done = true;
 	return;
  fail:
 	/* XXX Failure: Try to fallback to 64-bit only ? */
@@ -2474,7 +1962,6 @@
 	return 0;
 }
 
-#if defined(CONFIG_IOMMU_API) || defined(CONFIG_PCI_IOV)
 static long pnv_pci_ioda2_unset_window(struct iommu_table_group *table_group,
 		int num)
 {
@@ -2498,7 +1985,6 @@
 
 	return ret;
 }
-#endif
 
 #ifdef CONFIG_IOMMU_API
 unsigned long pnv_pci_ioda2_get_table_size(__u32 page_shift,
@@ -2547,6 +2033,19 @@
 	return ret;
 }
 
+static void pnv_ioda_setup_bus_dma(struct pnv_ioda_pe *pe, struct pci_bus *bus)
+{
+	struct pci_dev *dev;
+
+	list_for_each_entry(dev, &bus->devices, bus_list) {
+		set_iommu_table_base(&dev->dev, pe->table_group.tables[0]);
+		dev->dev.archdata.dma_offset = pe->tce_bypass_base;
+
+		if ((pe->flags & PNV_IODA_PE_BUS_ALL) && dev->subordinate)
+			pnv_ioda_setup_bus_dma(pe, dev->subordinate);
+	}
+}
+
 static void pnv_ioda2_take_ownership(struct iommu_table_group *table_group)
 {
 	struct pnv_ioda_pe *pe = container_of(table_group, struct pnv_ioda_pe,
@@ -2583,14 +2082,11 @@
 };
 #endif
 
-static void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
-				       struct pnv_ioda_pe *pe)
+void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb,
+				struct pnv_ioda_pe *pe)
 {
 	int64_t rc;
 
-	if (!pnv_pci_ioda_pe_dma_weight(pe))
-		return;
-
 	/* TVE #1 is selected by PCI address bit 59 */
 	pe->tce_bypass_base = 1ull << 59;
 
@@ -2615,6 +2111,7 @@
 	iommu_register_group(&pe->table_group, phb->hose->global_number,
 			     pe->pe_number);
 #endif
+	pe->dma_setup_done = true;
 }
 
 int64_t pnv_opal_pci_msi_eoi(struct irq_chip *chip, unsigned int hw_irq)
@@ -2763,118 +2260,6 @@
 		count, phb->msi_base);
 }
 
-#ifdef CONFIG_PCI_IOV
-static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
-{
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
-	const resource_size_t gate = phb->ioda.m64_segsize >> 2;
-	struct resource *res;
-	int i;
-	resource_size_t size, total_vf_bar_sz;
-	struct pci_dn *pdn;
-	int mul, total_vfs;
-
-	pdn = pci_get_pdn(pdev);
-	pdn->vfs_expanded = 0;
-	pdn->m64_single_mode = false;
-
-	total_vfs = pci_sriov_get_totalvfs(pdev);
-	mul = phb->ioda.total_pe_num;
-	total_vf_bar_sz = 0;
-
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &pdev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || res->parent)
-			continue;
-		if (!pnv_pci_is_m64_flags(res->flags)) {
-			dev_warn(&pdev->dev, "Don't support SR-IOV with"
-					" non M64 VF BAR%d: %pR. \n",
-				 i, res);
-			goto truncate_iov;
-		}
-
-		total_vf_bar_sz += pci_iov_resource_size(pdev,
-				i + PCI_IOV_RESOURCES);
-
-		/*
-		 * If bigger than quarter of M64 segment size, just round up
-		 * power of two.
-		 *
-		 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict
-		 * with other devices, IOV BAR size is expanded to be
-		 * (total_pe * VF_BAR_size).  When VF_BAR_size is half of M64
-		 * segment size , the expanded size would equal to half of the
-		 * whole M64 space size, which will exhaust the M64 Space and
-		 * limit the system flexibility.  This is a design decision to
-		 * set the boundary to quarter of the M64 segment size.
-		 */
-		if (total_vf_bar_sz > gate) {
-			mul = roundup_pow_of_two(total_vfs);
-			dev_info(&pdev->dev,
-				"VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n",
-				total_vf_bar_sz, gate, mul);
-			pdn->m64_single_mode = true;
-			break;
-		}
-	}
-
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &pdev->resource[i + PCI_IOV_RESOURCES];
-		if (!res->flags || res->parent)
-			continue;
-
-		size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
-		/*
-		 * On PHB3, the minimum size alignment of M64 BAR in single
-		 * mode is 32MB.
-		 */
-		if (pdn->m64_single_mode && (size < SZ_32M))
-			goto truncate_iov;
-		dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res);
-		res->end = res->start + size * mul - 1;
-		dev_dbg(&pdev->dev, "                       %pR\n", res);
-		dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
-			 i, res, mul);
-	}
-	pdn->vfs_expanded = mul;
-
-	return;
-
-truncate_iov:
-	/* To save MMIO space, IOV BAR is truncated. */
-	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
-		res = &pdev->resource[i + PCI_IOV_RESOURCES];
-		res->flags = 0;
-		res->end = res->start - 1;
-	}
-}
-
-static void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
-{
-	if (WARN_ON(pci_dev_is_added(pdev)))
-		return;
-
-	if (pdev->is_virtfn) {
-		struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
-
-		/*
-		 * VF PEs are single-device PEs so their pdev pointer needs to
-		 * be set. The pdev doesn't exist when the PE is allocated (in
-		 * (pcibios_sriov_enable()) so we fix it up here.
-		 */
-		pe->pdev = pdev;
-		WARN_ON(!(pe->flags & PNV_IODA_PE_VF));
-	} else if (pdev->is_physfn) {
-		/*
-		 * For PFs adjust their allocated IOV resources to match what
-		 * the PHB can support using it's M64 BAR table.
-		 */
-		pnv_pci_ioda_fixup_iov_resources(pdev);
-	}
-}
-#endif /* CONFIG_PCI_IOV */
-
 static void pnv_ioda_setup_pe_res(struct pnv_ioda_pe *pe,
 				  struct resource *res)
 {
@@ -3101,10 +2486,9 @@
 static resource_size_t pnv_pci_window_alignment(struct pci_bus *bus,
 						unsigned long type)
 {
-	struct pci_dev *bridge;
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
 	int num_pci_bridges = 0;
+	struct pci_dev *bridge;
 
 	bridge = bus->self;
 	while (bridge) {
@@ -3190,8 +2574,6 @@
 
 static void pnv_pci_configure_bus(struct pci_bus *bus)
 {
-	struct pci_controller *hose = pci_bus_to_host(bus);
-	struct pnv_phb *phb = hose->private_data;
 	struct pci_dev *bridge = bus->self;
 	struct pnv_ioda_pe *pe;
 	bool all = (bridge && pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE);
@@ -3215,17 +2597,6 @@
 		return;
 
 	pnv_ioda_setup_pe_seg(pe);
-	switch (phb->type) {
-	case PNV_PHB_IODA1:
-		pnv_pci_ioda1_setup_dma_pe(phb, pe);
-		break;
-	case PNV_PHB_IODA2:
-		pnv_pci_ioda2_setup_dma_pe(phb, pe);
-		break;
-	default:
-		pr_warn("%s: No DMA for PHB#%x (type %d)\n",
-			__func__, phb->hose->global_number, phb->type);
-	}
 }
 
 static resource_size_t pnv_pci_default_alignment(void)
@@ -3233,49 +2604,12 @@
 	return PAGE_SIZE;
 }
 
-#ifdef CONFIG_PCI_IOV
-static resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
-						      int resno)
-{
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
-	struct pci_dn *pdn = pci_get_pdn(pdev);
-	resource_size_t align;
-
-	/*
-	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
-	 * SR-IOV. While from hardware perspective, the range mapped by M64
-	 * BAR should be size aligned.
-	 *
-	 * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra
-	 * powernv-specific hardware restriction is gone. But if just use the
-	 * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with
-	 * in one segment of M64 #15, which introduces the PE conflict between
-	 * PF and VF. Based on this, the minimum alignment of an IOV BAR is
-	 * m64_segsize.
-	 *
-	 * This function returns the total IOV BAR size if M64 BAR is in
-	 * Shared PE mode or just VF BAR size if not.
-	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
-	 * M64 segment size if IOV BAR size is less.
-	 */
-	align = pci_iov_resource_size(pdev, resno);
-	if (!pdn->vfs_expanded)
-		return align;
-	if (pdn->m64_single_mode)
-		return max(align, (resource_size_t)phb->ioda.m64_segsize);
-
-	return pdn->vfs_expanded * align;
-}
-#endif /* CONFIG_PCI_IOV */
-
 /* Prevent enabling devices for which we couldn't properly
  * assign a PE
  */
 static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 	struct pci_dn *pdn;
 
 	/* The function is probably called while the PEs have
@@ -3346,11 +2680,10 @@
 
 static void pnv_pci_ioda1_release_pe_dma(struct pnv_ioda_pe *pe)
 {
-	unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
 	struct iommu_table *tbl = pe->table_group.tables[0];
 	int64_t rc;
 
-	if (!weight)
+	if (!pe->dma_setup_done)
 		return;
 
 	rc = pnv_pci_ioda1_unset_window(&pe->table_group, 0);
@@ -3367,22 +2700,17 @@
 	iommu_tce_table_put(tbl);
 }
 
-static void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
+void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe)
 {
 	struct iommu_table *tbl = pe->table_group.tables[0];
-	unsigned int weight = pnv_pci_ioda_pe_dma_weight(pe);
-#ifdef CONFIG_IOMMU_API
 	int64_t rc;
-#endif
 
-	if (!weight)
+	if (pe->dma_setup_done)
 		return;
 
-#ifdef CONFIG_IOMMU_API
 	rc = pnv_pci_ioda2_unset_window(&pe->table_group, 0);
 	if (rc)
 		pe_warn(pe, "OPAL error %lld release DMA window\n", rc);
-#endif
 
 	pnv_pci_ioda2_set_bypass(pe, false);
 	if (pe->table_group.group) {
@@ -3405,14 +2733,8 @@
 		if (map[idx] != pe->pe_number)
 			continue;
 
-		if (win == OPAL_M64_WINDOW_TYPE)
-			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-					phb->ioda.reserved_pe_idx, win,
-					idx / PNV_IODA1_M64_SEGS,
-					idx % PNV_IODA1_M64_SEGS);
-		else
-			rc = opal_pci_map_pe_mmio_window(phb->opal_id,
-					phb->ioda.reserved_pe_idx, win, 0, idx);
+		rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+				phb->ioda.reserved_pe_idx, win, 0, idx);
 
 		if (rc != OPAL_SUCCESS)
 			pe_warn(pe, "Error %lld unmapping (%d) segment#%d\n",
@@ -3431,8 +2753,7 @@
 				     phb->ioda.io_segmap);
 		pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
 				     phb->ioda.m32_segmap);
-		pnv_ioda_free_pe_seg(pe, OPAL_M64_WINDOW_TYPE,
-				     phb->ioda.m64_segmap);
+		/* M64 is pre-configured by pnv_ioda1_init_m64() */
 	} else if (phb->type == PNV_PHB_IODA2) {
 		pnv_ioda_free_pe_seg(pe, OPAL_M32_WINDOW_TYPE,
 				     phb->ioda.m32_segmap);
@@ -3488,17 +2809,27 @@
 
 static void pnv_pci_release_device(struct pci_dev *pdev)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct pci_dn *pdn = pci_get_pdn(pdev);
 	struct pnv_ioda_pe *pe;
 
+	/* The VF PE state is torn down when sriov_disable() is called */
 	if (pdev->is_virtfn)
 		return;
 
 	if (!pdn || pdn->pe_number == IODA_INVALID_PE)
 		return;
 
+#ifdef CONFIG_PCI_IOV
+	/*
+	 * FIXME: Try move this to sriov_disable(). It's here since we allocate
+	 * the iov state at probe time since we need to fiddle with the IOV
+	 * resources.
+	 */
+	if (pdev->is_physfn)
+		kfree(pdev->dev.archdata.iov_data);
+#endif
+
 	/*
 	 * PCI hotplug can happen as part of EEH error recovery. The @pdn
 	 * isn't removed and added afterwards in this scenario. We should
@@ -3534,8 +2865,7 @@
 
 static void pnv_pci_ioda_dma_bus_setup(struct pci_bus *bus)
 {
-	struct pci_controller *hose = bus->sysdata;
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(bus);
 	struct pnv_ioda_pe *pe;
 
 	list_for_each_entry(pe, &phb->ioda.pe_list, list) {
@@ -3760,7 +3090,7 @@
 		pnv_ioda_reserve_pe(phb, phb->ioda.root_pe_idx);
 	} else {
 		/* otherwise just allocate one */
-		root_pe = pnv_ioda_alloc_pe(phb);
+		root_pe = pnv_ioda_alloc_pe(phb, 1);
 		phb->ioda.root_pe_idx = root_pe->pe_number;
 	}
 
@@ -3873,8 +3203,7 @@
 
 static void pnv_npu2_opencapi_cfg_size_fixup(struct pci_dev *dev)
 {
-	struct pci_controller *hose = pci_bus_to_host(dev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 
 	if (!machine_is(powernv))
 		return;
diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
new file mode 100644
index 0000000..c4434f2
--- /dev/null
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -0,0 +1,766 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/kernel.h>
+#include <linux/ioport.h>
+#include <linux/bitmap.h>
+#include <linux/pci.h>
+
+#include <asm/opal.h>
+
+#include "pci.h"
+
+/* for pci_dev_is_added() */
+#include "../../../../drivers/pci/pci.h"
+
+/*
+ * The majority of the complexity in supporting SR-IOV on PowerNV comes from
+ * the need to put the MMIO space for each VF into a separate PE. Internally
+ * the PHB maps MMIO addresses to a specific PE using the "Memory BAR Table".
+ * The MBT historically only applied to the 64bit MMIO window of the PHB
+ * so it's common to see it referred to as the "M64BT".
+ *
+ * An MBT entry stores the mapped range as an <base>,<mask> pair. This forces
+ * the address range that we want to map to be power-of-two sized and aligned.
+ * For conventional PCI devices this isn't really an issue since PCI device BARs
+ * have the same requirement.
+ *
+ * For a SR-IOV BAR things are a little more awkward since size and alignment
+ * are not coupled. The alignment is set based on the the per-VF BAR size, but
+ * the total BAR area is: number-of-vfs * per-vf-size. The number of VFs
+ * isn't necessarily a power of two, so neither is the total size. To fix that
+ * we need to finesse (read: hack) the Linux BAR allocator so that it will
+ * allocate the SR-IOV BARs in a way that lets us map them using the MBT.
+ *
+ * The changes to size and alignment that we need to do depend on the "mode"
+ * of MBT entry that we use. We only support SR-IOV on PHB3 (IODA2) and above,
+ * so as a baseline we can assume that we have the following BAR modes
+ * available:
+ *
+ *   NB: $PE_COUNT is the number of PEs that the PHB supports.
+ *
+ * a) A segmented BAR that splits the mapped range into $PE_COUNT equally sized
+ *    segments. The n'th segment is mapped to the n'th PE.
+ * b) An un-segmented BAR that maps the whole address range to a specific PE.
+ *
+ *
+ * We prefer to use mode a) since it only requires one MBT entry per SR-IOV BAR
+ * For comparison b) requires one entry per-VF per-BAR, or:
+ * (num-vfs * num-sriov-bars) in total. To use a) we need the size of each segment
+ * to equal the size of the per-VF BAR area. So:
+ *
+ *	new_size = per-vf-size * number-of-PEs
+ *
+ * The alignment for the SR-IOV BAR also needs to be changed from per-vf-size
+ * to "new_size", calculated above. Implementing this is a convoluted process
+ * which requires several hooks in the PCI core:
+ *
+ * 1. In pcibios_add_device() we call pnv_pci_ioda_fixup_iov().
+ *
+ *    At this point the device has been probed and the device's BARs are sized,
+ *    but no resource allocations have been done. The SR-IOV BARs are sized
+ *    based on the maximum number of VFs supported by the device and we need
+ *    to increase that to new_size.
+ *
+ * 2. Later, when Linux actually assigns resources it tries to make the resource
+ *    allocations for each PCI bus as compact as possible. As a part of that it
+ *    sorts the BARs on a bus by their required alignment, which is calculated
+ *    using pci_resource_alignment().
+ *
+ *    For IOV resources this goes:
+ *    pci_resource_alignment()
+ *        pci_sriov_resource_alignment()
+ *            pcibios_sriov_resource_alignment()
+ *                pnv_pci_iov_resource_alignment()
+ *
+ *    Our hook overrides the default alignment, equal to the per-vf-size, with
+ *    new_size computed above.
+ *
+ * 3. When userspace enables VFs for a device:
+ *
+ *    sriov_enable()
+ *       pcibios_sriov_enable()
+ *           pnv_pcibios_sriov_enable()
+ *
+ *    This is where we actually allocate PE numbers for each VF and setup the
+ *    MBT mapping for each SR-IOV BAR. In steps 1) and 2) we setup an "arena"
+ *    where each MBT segment is equal in size to the VF BAR so we can shift
+ *    around the actual SR-IOV BAR location within this arena. We need this
+ *    ability because the PE space is shared by all devices on the same PHB.
+ *    When using mode a) described above segment 0 in maps to PE#0 which might
+ *    be already being used by another device on the PHB.
+ *
+ *    As a result we need allocate a contigious range of PE numbers, then shift
+ *    the address programmed into the SR-IOV BAR of the PF so that the address
+ *    of VF0 matches up with the segment corresponding to the first allocated
+ *    PE number. This is handled in pnv_pci_vf_resource_shift().
+ *
+ *    Once all that is done we return to the PCI core which then enables VFs,
+ *    scans them and creates pci_devs for each. The init process for a VF is
+ *    largely the same as a normal device, but the VF is inserted into the IODA
+ *    PE that we allocated for it rather than the PE associated with the bus.
+ *
+ * 4. When userspace disables VFs we unwind the above in
+ *    pnv_pcibios_sriov_disable(). Fortunately this is relatively simple since
+ *    we don't need to validate anything, just tear down the mappings and
+ *    move SR-IOV resource back to its "proper" location.
+ *
+ * That's how mode a) works. In theory mode b) (single PE mapping) is less work
+ * since we can map each individual VF with a separate BAR. However, there's a
+ * few limitations:
+ *
+ * 1) For IODA2 mode b) has a minimum alignment requirement of 32MB. This makes
+ *    it only usable for devices with very large per-VF BARs. Such devices are
+ *    similar to Big Foot. They definitely exist, but I've never seen one.
+ *
+ * 2) The number of MBT entries that we have is limited. PHB3 and PHB4 only
+ *    16 total and some are needed for. Most SR-IOV capable network cards can support
+ *    more than 16 VFs on each port.
+ *
+ * We use b) when using a) would use more than 1/4 of the entire 64 bit MMIO
+ * window of the PHB.
+ *
+ *
+ *
+ * PHB4 (IODA3) added a few new features that would be useful for SR-IOV. It
+ * allowed the MBT to map 32bit MMIO space in addition to 64bit which allows
+ * us to support SR-IOV BARs in the 32bit MMIO window. This is useful since
+ * the Linux BAR allocation will place any BAR marked as non-prefetchable into
+ * the non-prefetchable bridge window, which is 32bit only. It also added two
+ * new modes:
+ *
+ * c) A segmented BAR similar to a), but each segment can be individually
+ *    mapped to any PE. This is matches how the 32bit MMIO window worked on
+ *    IODA1&2.
+ *
+ * d) A segmented BAR with 8, 64, or 128 segments. This works similarly to a),
+ *    but with fewer segments and configurable base PE.
+ *
+ *    i.e. The n'th segment maps to the (n + base)'th PE.
+ *
+ *    The base PE is also required to be a multiple of the window size.
+ *
+ * Unfortunately, the OPAL API doesn't currently (as of skiboot v6.6) allow us
+ * to exploit any of the IODA3 features.
+ */
+
+static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev)
+{
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+	struct resource *res;
+	int i;
+	resource_size_t vf_bar_sz;
+	struct pnv_iov_data *iov;
+	int mul;
+
+	iov = kzalloc(sizeof(*iov), GFP_KERNEL);
+	if (!iov)
+		goto disable_iov;
+	pdev->dev.archdata.iov_data = iov;
+	mul = phb->ioda.total_pe_num;
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || res->parent)
+			continue;
+		if (!pnv_pci_is_m64_flags(res->flags)) {
+			dev_warn(&pdev->dev, "Don't support SR-IOV with non M64 VF BAR%d: %pR. \n",
+				 i, res);
+			goto disable_iov;
+		}
+
+		vf_bar_sz = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES);
+
+		/*
+		 * Generally, one segmented M64 BAR maps one IOV BAR. However,
+		 * if a VF BAR is too large we end up wasting a lot of space.
+		 * If each VF needs more than 1/4 of the default m64 segment
+		 * then each VF BAR should be mapped in single-PE mode to reduce
+		 * the amount of space required. This does however limit the
+		 * number of VFs we can support.
+		 *
+		 * The 1/4 limit is arbitrary and can be tweaked.
+		 */
+		if (vf_bar_sz > (phb->ioda.m64_segsize >> 2)) {
+			/*
+			 * On PHB3, the minimum size alignment of M64 BAR in
+			 * single mode is 32MB. If this VF BAR is smaller than
+			 * 32MB, but still too large for a segmented window
+			 * then we can't map it and need to disable SR-IOV for
+			 * this device.
+			 */
+			if (vf_bar_sz < SZ_32M) {
+				pci_err(pdev, "VF BAR%d: %pR can't be mapped in single PE mode\n",
+					i, res);
+				goto disable_iov;
+			}
+
+			iov->m64_single_mode[i] = true;
+			continue;
+		}
+
+		/*
+		 * This BAR can be mapped with one segmented window, so adjust
+		 * te resource size to accommodate.
+		 */
+		pci_dbg(pdev, " Fixing VF BAR%d: %pR to\n", i, res);
+		res->end = res->start + vf_bar_sz * mul - 1;
+		pci_dbg(pdev, "                       %pR\n", res);
+
+		pci_info(pdev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)",
+			 i, res, mul);
+
+		iov->need_shift = true;
+	}
+
+	return;
+
+disable_iov:
+	/* Save ourselves some MMIO space by disabling the unusable BARs */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		res->flags = 0;
+		res->end = res->start - 1;
+	}
+
+	pdev->dev.archdata.iov_data = NULL;
+	kfree(iov);
+}
+
+void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev)
+{
+	if (WARN_ON(pci_dev_is_added(pdev)))
+		return;
+
+	if (pdev->is_virtfn) {
+		struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev);
+
+		/*
+		 * VF PEs are single-device PEs so their pdev pointer needs to
+		 * be set. The pdev doesn't exist when the PE is allocated (in
+		 * (pcibios_sriov_enable()) so we fix it up here.
+		 */
+		pe->pdev = pdev;
+		WARN_ON(!(pe->flags & PNV_IODA_PE_VF));
+	} else if (pdev->is_physfn) {
+		/*
+		 * For PFs adjust their allocated IOV resources to match what
+		 * the PHB can support using it's M64 BAR table.
+		 */
+		pnv_pci_ioda_fixup_iov_resources(pdev);
+	}
+}
+
+resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev,
+						      int resno)
+{
+	resource_size_t align = pci_iov_resource_size(pdev, resno);
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
+	struct pnv_iov_data *iov = pnv_iov_get(pdev);
+
+	/*
+	 * iov can be null if we have an SR-IOV device with IOV BAR that can't
+	 * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch).
+	 * In that case we don't allow VFs to be enabled since one of their
+	 * BARs would not be placed in the correct PE.
+	 */
+	if (!iov)
+		return align;
+
+	/*
+	 * If we're using single mode then we can just use the native VF BAR
+	 * alignment. We validated that it's possible to use a single PE
+	 * window above when we did the fixup.
+	 */
+	if (iov->m64_single_mode[resno - PCI_IOV_RESOURCES])
+		return align;
+
+	/*
+	 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the
+	 * SR-IOV. While from hardware perspective, the range mapped by M64
+	 * BAR should be size aligned.
+	 *
+	 * This function returns the total IOV BAR size if M64 BAR is in
+	 * Shared PE mode or just VF BAR size if not.
+	 * If the M64 BAR is in Single PE mode, return the VF BAR size or
+	 * M64 segment size if IOV BAR size is less.
+	 */
+	return phb->ioda.total_pe_num * align;
+}
+
+static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_iov_data   *iov;
+	struct pnv_phb        *phb;
+	int window_id;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	iov = pnv_iov_get(pdev);
+
+	for_each_set_bit(window_id, iov->used_m64_bar_mask, MAX_M64_BARS) {
+		opal_pci_phb_mmio_enable(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 0);
+
+		clear_bit(window_id, &phb->ioda.m64_bar_alloc);
+	}
+
+	return 0;
+}
+
+
+/*
+ * PHB3 and beyond support segmented windows. The window's address range
+ * is subdivided into phb->ioda.total_pe_num segments and there's a 1-1
+ * mapping between PEs and segments.
+ */
+static int64_t pnv_ioda_map_m64_segmented(struct pnv_phb *phb,
+					  int window_id,
+					  resource_size_t start,
+					  resource_size_t size)
+{
+	int64_t rc;
+
+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 start,
+					 0, /* unused */
+					 size);
+	if (rc)
+		goto out;
+
+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				      OPAL_M64_WINDOW_TYPE,
+				      window_id,
+				      OPAL_ENABLE_M64_SPLIT);
+out:
+	if (rc)
+		pr_err("Failed to map M64 window #%d: %lld\n", window_id, rc);
+
+	return rc;
+}
+
+static int64_t pnv_ioda_map_m64_single(struct pnv_phb *phb,
+				       int pe_num,
+				       int window_id,
+				       resource_size_t start,
+				       resource_size_t size)
+{
+	int64_t rc;
+
+	/*
+	 * The API for setting up m64 mmio windows seems to have been designed
+	 * with P7-IOC in mind. For that chip each M64 BAR (window) had a fixed
+	 * split of 8 equally sized segments each of which could individually
+	 * assigned to a PE.
+	 *
+	 * The problem with this is that the API doesn't have any way to
+	 * communicate the number of segments we want on a BAR. This wasn't
+	 * a problem for p7-ioc since you didn't have a choice, but the
+	 * single PE windows added in PHB3 don't map cleanly to this API.
+	 *
+	 * As a result we've got this slightly awkward process where we
+	 * call opal_pci_map_pe_mmio_window() to put the single in single
+	 * PE mode, and set the PE for the window before setting the address
+	 * bounds. We need to do it this way because the single PE windows
+	 * for PHB3 have different alignment requirements on PHB3.
+	 */
+	rc = opal_pci_map_pe_mmio_window(phb->opal_id,
+					 pe_num,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 0);
+	if (rc)
+		goto out;
+
+	/*
+	 * NB: In single PE mode the window needs to be aligned to 32MB
+	 */
+	rc = opal_pci_set_phb_mem_window(phb->opal_id,
+					 OPAL_M64_WINDOW_TYPE,
+					 window_id,
+					 start,
+					 0, /* ignored by FW, m64 is 1-1 */
+					 size);
+	if (rc)
+		goto out;
+
+	/*
+	 * Now actually enable it. We specified the BAR should be in "non-split"
+	 * mode so FW will validate that the BAR is in single PE mode.
+	 */
+	rc = opal_pci_phb_mmio_enable(phb->opal_id,
+				      OPAL_M64_WINDOW_TYPE,
+				      window_id,
+				      OPAL_ENABLE_M64_NON_SPLIT);
+out:
+	if (rc)
+		pr_err("Error mapping single PE BAR\n");
+
+	return rc;
+}
+
+static int pnv_pci_alloc_m64_bar(struct pnv_phb *phb, struct pnv_iov_data *iov)
+{
+	int win;
+
+	do {
+		win = find_next_zero_bit(&phb->ioda.m64_bar_alloc,
+				phb->ioda.m64_bar_idx + 1, 0);
+
+		if (win >= phb->ioda.m64_bar_idx + 1)
+			return -1;
+	} while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc));
+
+	set_bit(win, iov->used_m64_bar_mask);
+
+	return win;
+}
+
+static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_iov_data   *iov;
+	struct pnv_phb        *phb;
+	unsigned int           win;
+	struct resource       *res;
+	int                    i, j;
+	int64_t                rc;
+	resource_size_t        size, start;
+	int                    base_pe_num;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	iov = pnv_iov_get(pdev);
+
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &pdev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+
+		/* don't need single mode? map everything in one go! */
+		if (!iov->m64_single_mode[i]) {
+			win = pnv_pci_alloc_m64_bar(phb, iov);
+			if (win < 0)
+				goto m64_failed;
+
+			size = resource_size(res);
+			start = res->start;
+
+			rc = pnv_ioda_map_m64_segmented(phb, win, start, size);
+			if (rc)
+				goto m64_failed;
+
+			continue;
+		}
+
+		/* otherwise map each VF with single PE BARs */
+		size = pci_iov_resource_size(pdev, PCI_IOV_RESOURCES + i);
+		base_pe_num = iov->vf_pe_arr[0].pe_number;
+
+		for (j = 0; j < num_vfs; j++) {
+			win = pnv_pci_alloc_m64_bar(phb, iov);
+			if (win < 0)
+				goto m64_failed;
+
+			start = res->start + size * j;
+			rc = pnv_ioda_map_m64_single(phb, win,
+						     base_pe_num + j,
+						     start,
+						     size);
+			if (rc)
+				goto m64_failed;
+		}
+	}
+	return 0;
+
+m64_failed:
+	pnv_pci_vf_release_m64(pdev, num_vfs);
+	return -EBUSY;
+}
+
+static void pnv_ioda_release_vf_PE(struct pci_dev *pdev)
+{
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe, *pe_n;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+
+	if (!pdev->is_physfn)
+		return;
+
+	/* FIXME: Use pnv_ioda_release_pe()? */
+	list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) {
+		if (pe->parent_dev != pdev)
+			continue;
+
+		pnv_pci_ioda2_release_pe_dma(pe);
+
+		/* Remove from list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_del(&pe->list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		pnv_ioda_deconfigure_pe(phb, pe);
+
+		pnv_ioda_free_pe(pe);
+	}
+}
+
+static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset)
+{
+	struct resource *res, res2;
+	struct pnv_iov_data *iov;
+	resource_size_t size;
+	u16 num_vfs;
+	int i;
+
+	if (!dev->is_physfn)
+		return -EINVAL;
+	iov = pnv_iov_get(dev);
+
+	/*
+	 * "offset" is in VFs.  The M64 windows are sized so that when they
+	 * are segmented, each segment is the same size as the IOV BAR.
+	 * Each segment is in a separate PE, and the high order bits of the
+	 * address are the PE number.  Therefore, each VF's BAR is in a
+	 * separate PE, and changing the IOV BAR start address changes the
+	 * range of PEs the VFs are in.
+	 */
+	num_vfs = iov->num_vfs;
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+		if (iov->m64_single_mode[i])
+			continue;
+
+		/*
+		 * The actual IOV BAR range is determined by the start address
+		 * and the actual size for num_vfs VFs BAR.  This check is to
+		 * make sure that after shifting, the range will not overlap
+		 * with another device.
+		 */
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2.flags = res->flags;
+		res2.start = res->start + (size * offset);
+		res2.end = res2.start + (size * num_vfs) - 1;
+
+		if (res2.end > res->end) {
+			dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n",
+				i, &res2, res, num_vfs, offset);
+			return -EBUSY;
+		}
+	}
+
+	/*
+	 * Since M64 BAR shares segments among all possible 256 PEs,
+	 * we have to shift the beginning of PF IOV BAR to make it start from
+	 * the segment which belongs to the PE number assigned to the first VF.
+	 * This creates a "hole" in the /proc/iomem which could be used for
+	 * allocating other resources so we reserve this area below and
+	 * release when IOV is released.
+	 */
+	for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+		res = &dev->resource[i + PCI_IOV_RESOURCES];
+		if (!res->flags || !res->parent)
+			continue;
+		if (iov->m64_single_mode[i])
+			continue;
+
+		size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES);
+		res2 = *res;
+		res->start += size * offset;
+
+		dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n",
+			 i, &res2, res, (offset > 0) ? "En" : "Dis",
+			 num_vfs, offset);
+
+		if (offset < 0) {
+			devm_release_resource(&dev->dev, &iov->holes[i]);
+			memset(&iov->holes[i], 0, sizeof(iov->holes[i]));
+		}
+
+		pci_update_resource(dev, i + PCI_IOV_RESOURCES);
+
+		if (offset > 0) {
+			iov->holes[i].start = res2.start;
+			iov->holes[i].end = res2.start + size * offset - 1;
+			iov->holes[i].flags = IORESOURCE_BUS;
+			iov->holes[i].name = "pnv_iov_reserved";
+			devm_request_resource(&dev->dev, res->parent,
+					&iov->holes[i]);
+		}
+	}
+	return 0;
+}
+
+static void pnv_pci_sriov_disable(struct pci_dev *pdev)
+{
+	u16                    num_vfs, base_pe;
+	struct pnv_iov_data   *iov;
+
+	iov = pnv_iov_get(pdev);
+	num_vfs = iov->num_vfs;
+	base_pe = iov->vf_pe_arr[0].pe_number;
+
+	if (WARN_ON(!iov))
+		return;
+
+	/* Release VF PEs */
+	pnv_ioda_release_vf_PE(pdev);
+
+	/* Un-shift the IOV BARs if we need to */
+	if (iov->need_shift)
+		pnv_pci_vf_resource_shift(pdev, -base_pe);
+
+	/* Release M64 windows */
+	pnv_pci_vf_release_m64(pdev, num_vfs);
+}
+
+static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_phb        *phb;
+	struct pnv_ioda_pe    *pe;
+	int                    pe_num;
+	u16                    vf_index;
+	struct pnv_iov_data   *iov;
+	struct pci_dn         *pdn;
+
+	if (!pdev->is_physfn)
+		return;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	pdn = pci_get_pdn(pdev);
+	iov = pnv_iov_get(pdev);
+
+	/* Reserve PE for each VF */
+	for (vf_index = 0; vf_index < num_vfs; vf_index++) {
+		int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index);
+		int vf_bus = pci_iov_virtfn_bus(pdev, vf_index);
+		struct pci_dn *vf_pdn;
+
+		pe = &iov->vf_pe_arr[vf_index];
+		pe->phb = phb;
+		pe->flags = PNV_IODA_PE_VF;
+		pe->pbus = NULL;
+		pe->parent_dev = pdev;
+		pe->mve_number = -1;
+		pe->rid = (vf_bus << 8) | vf_devfn;
+
+		pe_num = pe->pe_number;
+		pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n",
+			pci_domain_nr(pdev->bus), pdev->bus->number,
+			PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num);
+
+		if (pnv_ioda_configure_pe(phb, pe)) {
+			/* XXX What do we do here ? */
+			pnv_ioda_free_pe(pe);
+			pe->pdev = NULL;
+			continue;
+		}
+
+		/* Put PE to the list */
+		mutex_lock(&phb->ioda.pe_list_mutex);
+		list_add_tail(&pe->list, &phb->ioda.pe_list);
+		mutex_unlock(&phb->ioda.pe_list_mutex);
+
+		/* associate this pe to it's pdn */
+		list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) {
+			if (vf_pdn->busno == vf_bus &&
+			    vf_pdn->devfn == vf_devfn) {
+				vf_pdn->pe_number = pe_num;
+				break;
+			}
+		}
+
+		pnv_pci_ioda2_setup_dma_pe(phb, pe);
+	}
+}
+
+static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	struct pnv_ioda_pe    *base_pe;
+	struct pnv_iov_data   *iov;
+	struct pnv_phb        *phb;
+	int                    ret;
+	u16                    i;
+
+	phb = pci_bus_to_pnvhb(pdev->bus);
+	iov = pnv_iov_get(pdev);
+
+	/*
+	 * There's a calls to IODA2 PE setup code littered throughout. We could
+	 * probably fix that, but we'd still have problems due to the
+	 * restriction inherent on IODA1 PHBs.
+	 *
+	 * NB: We class IODA3 as IODA2 since they're very similar.
+	 */
+	if (phb->type != PNV_PHB_IODA2) {
+		pci_err(pdev, "SR-IOV is not supported on this PHB\n");
+		return -ENXIO;
+	}
+
+	if (!iov) {
+		dev_info(&pdev->dev, "don't support this SRIOV device with non 64bit-prefetchable IOV BAR\n");
+		return -ENOSPC;
+	}
+
+	/* allocate a contigious block of PEs for our VFs */
+	base_pe = pnv_ioda_alloc_pe(phb, num_vfs);
+	if (!base_pe) {
+		pci_err(pdev, "Unable to allocate PEs for %d VFs\n", num_vfs);
+		return -EBUSY;
+	}
+
+	iov->vf_pe_arr = base_pe;
+	iov->num_vfs = num_vfs;
+
+	/* Assign M64 window accordingly */
+	ret = pnv_pci_vf_assign_m64(pdev, num_vfs);
+	if (ret) {
+		dev_info(&pdev->dev, "Not enough M64 window resources\n");
+		goto m64_failed;
+	}
+
+	/*
+	 * When using one M64 BAR to map one IOV BAR, we need to shift
+	 * the IOV BAR according to the PE# allocated to the VFs.
+	 * Otherwise, the PE# for the VF will conflict with others.
+	 */
+	if (iov->need_shift) {
+		ret = pnv_pci_vf_resource_shift(pdev, base_pe->pe_number);
+		if (ret)
+			goto shift_failed;
+	}
+
+	/* Setup VF PEs */
+	pnv_ioda_setup_vf_PE(pdev, num_vfs);
+
+	return 0;
+
+shift_failed:
+	pnv_pci_vf_release_m64(pdev, num_vfs);
+
+m64_failed:
+	for (i = 0; i < num_vfs; i++)
+		pnv_ioda_free_pe(&iov->vf_pe_arr[i]);
+
+	return ret;
+}
+
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev)
+{
+	pnv_pci_sriov_disable(pdev);
+
+	/* Release PCI data */
+	remove_sriov_vf_pdns(pdev);
+	return 0;
+}
+
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs)
+{
+	/* Allocate PCI data */
+	add_sriov_vf_pdns(pdev);
+
+	return pnv_pci_sriov_enable(pdev, num_vfs);
+}
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index 091fe1c..9b9bca1 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -162,8 +162,7 @@
 
 int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct msi_desc *entry;
 	struct msi_msg msg;
 	int hwirq;
@@ -211,8 +210,7 @@
 
 void pnv_teardown_msi_irqs(struct pci_dev *pdev)
 {
-	struct pci_controller *hose = pci_bus_to_host(pdev->bus);
-	struct pnv_phb *phb = hose->private_data;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus);
 	struct msi_desc *entry;
 	irq_hw_number_t hwirq;
 
@@ -824,10 +822,9 @@
 
 int pnv_pci_set_tunnel_bar(struct pci_dev *dev, u64 addr, int enable)
 {
-	__be64 val;
-	struct pci_controller *hose;
-	struct pnv_phb *phb;
+	struct pnv_phb *phb = pci_bus_to_pnvhb(dev->bus);
 	u64 tunnel_bar;
+	__be64 val;
 	int rc;
 
 	if (!opal_check_token(OPAL_PCI_GET_PBCQ_TUNNEL_BAR))
@@ -835,9 +832,6 @@
 	if (!opal_check_token(OPAL_PCI_SET_PBCQ_TUNNEL_BAR))
 		return -ENXIO;
 
-	hose = pci_bus_to_host(dev->bus);
-	phb = hose->private_data;
-
 	mutex_lock(&tunnel_mutex);
 	rc = opal_pci_get_pbcq_tunnel_bar(phb->opal_id, &val);
 	if (rc != OPAL_SUCCESS) {
diff --git a/arch/powerpc/platforms/powernv/pci.h b/arch/powerpc/platforms/powernv/pci.h
index 51c254f2..739a0b3 100644
--- a/arch/powerpc/platforms/powernv/pci.h
+++ b/arch/powerpc/platforms/powernv/pci.h
@@ -87,7 +87,14 @@
 	bool			tce_bypass_enabled;
 	uint64_t		tce_bypass_base;
 
-	/* MSIs. MVE index is identical for for 32 and 64 bit MSI
+	/*
+	 * Used to track whether we've done DMA setup for this PE or not. We
+	 * want to defer allocating TCE tables, etc until we've added a
+	 * non-bridge device to the PE.
+	 */
+	bool			dma_setup_done;
+
+	/* MSIs. MVE index is identical for 32 and 64 bit MSI
 	 * and -1 if not supported. (It's actually identical to the
 	 * PE number)
 	 */
@@ -147,6 +154,7 @@
 		unsigned long		m64_size;
 		unsigned long		m64_segsize;
 		unsigned long		m64_base;
+#define MAX_M64_BARS 64
 		unsigned long		m64_bar_alloc;
 
 		/* IO ports */
@@ -187,6 +195,89 @@
 	u8			*diag_data;
 };
 
+
+/* IODA PE management */
+
+static inline bool pnv_pci_is_m64(struct pnv_phb *phb, struct resource *r)
+{
+	/*
+	 * WARNING: We cannot rely on the resource flags. The Linux PCI
+	 * allocation code sometimes decides to put a 64-bit prefetchable
+	 * BAR in the 32-bit window, so we have to compare the addresses.
+	 *
+	 * For simplicity we only test resource start.
+	 */
+	return (r->start >= phb->ioda.m64_base &&
+		r->start < (phb->ioda.m64_base + phb->ioda.m64_size));
+}
+
+static inline bool pnv_pci_is_m64_flags(unsigned long resource_flags)
+{
+	unsigned long flags = (IORESOURCE_MEM_64 | IORESOURCE_PREFETCH);
+
+	return (resource_flags & flags) == flags;
+}
+
+int pnv_ioda_configure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+
+void pnv_pci_ioda2_setup_dma_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe);
+void pnv_pci_ioda2_release_pe_dma(struct pnv_ioda_pe *pe);
+
+struct pnv_ioda_pe *pnv_ioda_alloc_pe(struct pnv_phb *phb, int count);
+void pnv_ioda_free_pe(struct pnv_ioda_pe *pe);
+
+#ifdef CONFIG_PCI_IOV
+/*
+ * For SR-IOV we want to put each VF's MMIO resource in to a separate PE.
+ * This requires a bit of acrobatics with the MMIO -> PE configuration
+ * and this structure is used to keep track of it all.
+ */
+struct pnv_iov_data {
+	/* number of VFs enabled */
+	u16     num_vfs;
+
+	/* pointer to the array of VF PEs. num_vfs long*/
+	struct pnv_ioda_pe *vf_pe_arr;
+
+	/* Did we map the VF BAR with single-PE IODA BARs? */
+	bool    m64_single_mode[PCI_SRIOV_NUM_BARS];
+
+	/*
+	 * True if we're using any segmented windows. In that case we need
+	 * shift the start of the IOV resource the segment corresponding to
+	 * the allocated PE.
+	 */
+	bool    need_shift;
+
+	/*
+	 * Bit mask used to track which m64 windows are used to map the
+	 * SR-IOV BARs for this device.
+	 */
+	DECLARE_BITMAP(used_m64_bar_mask, MAX_M64_BARS);
+
+	/*
+	 * If we map the SR-IOV BARs with a segmented window then
+	 * parts of that window will be "claimed" by other PEs.
+	 *
+	 * "holes" here is used to reserve the leading portion
+	 * of the window that is used by other (non VF) PEs.
+	 */
+	struct resource holes[PCI_SRIOV_NUM_BARS];
+};
+
+static inline struct pnv_iov_data *pnv_iov_get(struct pci_dev *pdev)
+{
+	return pdev->dev.archdata.iov_data;
+}
+
+void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev);
+resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, int resno);
+
+int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs);
+int pnv_pcibios_sriov_disable(struct pci_dev *pdev);
+#endif /* CONFIG_PCI_IOV */
+
 extern struct pci_ops pnv_pci_ops;
 
 void pnv_pci_dump_phb_diag_data(struct pci_controller *hose,
@@ -260,4 +351,14 @@
 
 extern unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb);
 
+static inline struct pnv_phb *pci_bus_to_pnvhb(struct pci_bus *bus)
+{
+	struct pci_controller *hose = bus->sysdata;
+
+	if (hose)
+		return hose->private_data;
+
+	return NULL;
+}
+
 #endif /* __POWERNV_PCI_H */
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 3bc188d..7fcb886 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -399,7 +399,15 @@
 #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
 static unsigned long pnv_memory_block_size(void)
 {
-	return 256UL * 1024 * 1024;
+	/*
+	 * We map the kernel linear region with 1GB large pages on radix. For
+	 * memory hot unplug to work our memory block size must be at least
+	 * this size.
+	 */
+	if (radix_enabled())
+		return radix_mem_block_size;
+	else
+		return 256UL * 1024 * 1024;
 }
 #endif
 
diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig
index 24c1836..5e037df 100644
--- a/arch/powerpc/platforms/pseries/Kconfig
+++ b/arch/powerpc/platforms/pseries/Kconfig
@@ -25,15 +25,22 @@
 	select SWIOTLB
 	default y
 
+config PARAVIRT_SPINLOCKS
+	bool
+
 config PPC_SPLPAR
-	depends on PPC_PSERIES
 	bool "Support for shared-processor logical partitions"
+	depends on PPC_PSERIES
+	select PARAVIRT_SPINLOCKS if PPC_QUEUED_SPINLOCKS
+	default y
 	help
 	  Enabling this option will make the kernel run more efficiently
 	  on logically-partitioned pSeries systems which use shared
 	  processors, that is, which share physical processors between
 	  two or more partitions.
 
+	  Say Y if you are unsure.
+
 config DTL
 	bool "Dispatch Trace Log"
 	depends on PPC_SPLPAR && DEBUG_FS
diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c b/arch/powerpc/platforms/pseries/eeh_pseries.c
index ace117f..cb2d9a9 100644
--- a/arch/powerpc/platforms/pseries/eeh_pseries.c
+++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
@@ -24,6 +24,7 @@
 #include <linux/sched.h>
 #include <linux/seq_file.h>
 #include <linux/spinlock.h>
+#include <linux/crash_dump.h>
 
 #include <asm/eeh.h>
 #include <asm/eeh_event.h>
@@ -32,6 +33,8 @@
 #include <asm/ppc-pci.h>
 #include <asm/rtas.h>
 
+static int pseries_eeh_get_pe_addr(struct pci_dn *pdn);
+
 /* RTAS tokens */
 static int ibm_set_eeh_option;
 static int ibm_set_slot_reset;
@@ -52,8 +55,6 @@
 	dev_dbg(&pdev->dev, "EEH: Setting up device\n");
 #ifdef CONFIG_PCI_IOV
 	if (pdev->is_virtfn) {
-		struct pci_dn *physfn_pdn;
-
 		pdn->device_id  =  pdev->device;
 		pdn->vendor_id  =  pdev->vendor;
 		pdn->class_code =  pdev->class;
@@ -63,23 +64,172 @@
 		 * completion from platform.
 		 */
 		pdn->last_allow_rc =  0;
-		physfn_pdn      =  pci_get_pdn(pdev->physfn);
-		pdn->pe_number  =  physfn_pdn->pe_num_map[pdn->vf_index];
 	}
 #endif
 	pseries_eeh_init_edev(pdn);
 #ifdef CONFIG_PCI_IOV
 	if (pdev->is_virtfn) {
+		/*
+		 * FIXME: This really should be handled by choosing the right
+		 *        parent PE in in pseries_eeh_init_edev().
+		 */
+		struct eeh_pe *physfn_pe = pci_dev_to_eeh_dev(pdev->physfn)->pe;
 		struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
 
 		edev->pe_config_addr =  (pdn->busno << 16) | (pdn->devfn << 8);
-		eeh_rmv_from_parent_pe(edev); /* Remove as it is adding to bus pe */
-		eeh_add_to_parent_pe(edev);   /* Add as VF PE type */
+		eeh_pe_tree_remove(edev); /* Remove as it is adding to bus pe */
+		eeh_pe_tree_insert(edev, physfn_pe);   /* Add as VF PE type */
 	}
 #endif
 	eeh_probe_device(pdev);
 }
 
+
+/**
+ * pseries_eeh_get_config_addr - Retrieve config address
+ *
+ * Retrieve the assocated config address. Actually, there're 2 RTAS
+ * function calls dedicated for the purpose. We need implement
+ * it through the new function and then the old one. Besides,
+ * you should make sure the config address is figured out from
+ * FDT node before calling the function.
+ *
+ * It's notable that zero'ed return value means invalid PE config
+ * address.
+ */
+static int pseries_eeh_get_config_addr(struct pci_controller *phb, int config_addr)
+{
+	int ret = 0;
+	int rets[3];
+
+	if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
+		/*
+		 * First of all, we need to make sure there has one PE
+		 * associated with the device. Otherwise, PE address is
+		 * meaningless.
+		 */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), 1);
+		if (ret || (rets[0] == 0))
+			return 0;
+
+		/* Retrieve the associated PE config address */
+		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), 0);
+		if (ret) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
+				__func__, phb->global_number, config_addr);
+			return 0;
+		}
+
+		return rets[0];
+	}
+
+	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
+		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), 0);
+		if (ret) {
+			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
+				__func__, phb->global_number, config_addr);
+			return 0;
+		}
+
+		return rets[0];
+	}
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_phb_reset - Reset the specified PHB
+ * @phb: PCI controller
+ * @config_adddr: the associated config address
+ * @option: reset option
+ *
+ * Reset the specified PHB/PE
+ */
+static int pseries_eeh_phb_reset(struct pci_controller *phb, int config_addr, int option)
+{
+	int ret;
+
+	/* Reset PE through RTAS call */
+	ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+			config_addr, BUID_HI(phb->buid),
+			BUID_LO(phb->buid), option);
+
+	/* If fundamental-reset not supported, try hot-reset */
+	if (option == EEH_RESET_FUNDAMENTAL &&
+	    ret == -8) {
+		option = EEH_RESET_HOT;
+		ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid), option);
+	}
+
+	/* We need reset hold or settlement delay */
+	if (option == EEH_RESET_FUNDAMENTAL ||
+	    option == EEH_RESET_HOT)
+		msleep(EEH_PE_RST_HOLD_TIME);
+	else
+		msleep(EEH_PE_RST_SETTLE_TIME);
+
+	return ret;
+}
+
+/**
+ * pseries_eeh_phb_configure_bridge - Configure PCI bridges in the indicated PE
+ * @phb: PCI controller
+ * @config_adddr: the associated config address
+ *
+ * The function will be called to reconfigure the bridges included
+ * in the specified PE so that the mulfunctional PE would be recovered
+ * again.
+ */
+static int pseries_eeh_phb_configure_bridge(struct pci_controller *phb, int config_addr)
+{
+	int ret;
+	/* Waiting 0.2s maximum before skipping configuration */
+	int max_wait = 200;
+
+	while (max_wait > 0) {
+		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
+				config_addr, BUID_HI(phb->buid),
+				BUID_LO(phb->buid));
+
+		if (!ret)
+			return ret;
+		if (ret < 0)
+			break;
+
+		/*
+		 * If RTAS returns a delay value that's above 100ms, cut it
+		 * down to 100ms in case firmware made a mistake.  For more
+		 * on how these delay values work see rtas_busy_delay_time
+		 */
+		if (ret > RTAS_EXTENDED_DELAY_MIN+2 &&
+		    ret <= RTAS_EXTENDED_DELAY_MAX)
+			ret = RTAS_EXTENDED_DELAY_MIN+2;
+
+		max_wait -= rtas_busy_delay_time(ret);
+
+		if (max_wait < 0)
+			break;
+
+		rtas_busy_delay(ret);
+	}
+
+	pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
+		__func__, phb->global_number, config_addr, ret);
+	/* PAPR defines -3 as "Parameter Error" for this function: */
+	if (ret == -3)
+		return -EINVAL;
+	else
+		return -EIO;
+}
+
 /*
  * Buffer for reporting slot-error-detail rtas calls. Its here
  * in BSS, and not dynamically alloced, so that it ends up in
@@ -96,6 +246,10 @@
  */
 static int pseries_eeh_init(void)
 {
+	struct pci_controller *phb;
+	struct pci_dn *pdn;
+	int addr, config_addr;
+
 	/* figure out EEH RTAS function call tokens */
 	ibm_set_eeh_option		= rtas_token("ibm,set-eeh-option");
 	ibm_set_slot_reset		= rtas_token("ibm,set-slot-reset");
@@ -148,6 +302,22 @@
 	/* Set EEH machine dependent code */
 	ppc_md.pcibios_bus_add_device = pseries_pcibios_bus_add_device;
 
+	if (is_kdump_kernel() || reset_devices) {
+		pr_info("Issue PHB reset ...\n");
+		list_for_each_entry(phb, &hose_list, list_node) {
+			pdn = list_first_entry(&PCI_DN(phb->dn)->child_list, struct pci_dn, list);
+			addr = (pdn->busno << 16) | (pdn->devfn << 8);
+			config_addr = pseries_eeh_get_config_addr(phb, addr);
+			/* invalid PE config addr */
+			if (config_addr == 0)
+				continue;
+
+			pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_FUNDAMENTAL);
+			pseries_eeh_phb_reset(phb, config_addr, EEH_RESET_DEACTIVATE);
+			pseries_eeh_phb_configure_bridge(phb, config_addr);
+		}
+	}
+
 	return 0;
 }
 
@@ -221,6 +391,43 @@
 }
 
 /**
+ * pseries_eeh_pe_get_parent - Retrieve the parent PE
+ * @edev: EEH device
+ *
+ * The whole PEs existing in the system are organized as hierarchy
+ * tree. The function is used to retrieve the parent PE according
+ * to the parent EEH device.
+ */
+static struct eeh_pe *pseries_eeh_pe_get_parent(struct eeh_dev *edev)
+{
+	struct eeh_dev *parent;
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
+	/*
+	 * It might have the case for the indirect parent
+	 * EEH device already having associated PE, but
+	 * the direct parent EEH device doesn't have yet.
+	 */
+	if (edev->physfn)
+		pdn = pci_get_pdn(edev->physfn);
+	else
+		pdn = pdn ? pdn->parent : NULL;
+	while (pdn) {
+		/* We're poking out of PCI territory */
+		parent = pdn_to_eeh_dev(pdn);
+		if (!parent)
+			return NULL;
+
+		if (parent->pe)
+			return parent->pe;
+
+		pdn = pdn->parent;
+	}
+
+	return NULL;
+}
+
+/**
  * pseries_eeh_init_edev - initialise the eeh_dev and eeh_pe for a pci_dn
  *
  * @pdn: PCI device node
@@ -275,12 +482,11 @@
 	 * correctly reflects that current device is root port
 	 * or PCIe switch downstream port.
 	 */
-	edev->class_code = pdn->class_code;
 	edev->pcix_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_PCIX);
 	edev->pcie_cap = pseries_eeh_find_cap(pdn, PCI_CAP_ID_EXP);
 	edev->aer_cap = pseries_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR);
 	edev->mode &= 0xFFFFFF00;
-	if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
+	if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) {
 		edev->mode |= EEH_DEV_BRIDGE;
 		if (edev->pcie_cap) {
 			rtas_read_config(pdn, edev->pcie_cap + PCI_EXP_FLAGS,
@@ -304,8 +510,10 @@
 	if (ret) {
 		eeh_edev_dbg(edev, "EEH failed to enable on device (code %d)\n", ret);
 	} else {
+		struct eeh_pe *parent;
+
 		/* Retrieve PE address */
-		edev->pe_config_addr = eeh_ops->get_pe_addr(&pe);
+		edev->pe_config_addr = pseries_eeh_get_pe_addr(pdn);
 		pe.addr = edev->pe_config_addr;
 
 		/* Some older systems (Power4) allow the ibm,set-eeh-option
@@ -316,16 +524,19 @@
 		if (ret > 0 && ret != EEH_STATE_NOT_SUPPORT)
 			enable = 1;
 
-		if (enable) {
+		/*
+		 * This device doesn't support EEH, but it may have an
+		 * EEH parent. In this case any error on the device will
+		 * freeze the PE of it's upstream bridge, so added it to
+		 * the upstream PE.
+		 */
+		parent = pseries_eeh_pe_get_parent(edev);
+		if (parent && !enable)
+			edev->pe_config_addr = parent->addr;
+
+		if (enable || parent) {
 			eeh_add_flag(EEH_ENABLED);
-			eeh_add_to_parent_pe(edev);
-		} else if (pdn->parent && pdn_to_eeh_dev(pdn->parent) &&
-			   (pdn_to_eeh_dev(pdn->parent))->pe) {
-			/* This device doesn't support EEH, but it may have an
-			 * EEH parent, in which case we mark it as supported.
-			 */
-			edev->pe_config_addr = pdn_to_eeh_dev(pdn->parent)->pe_config_addr;
-			eeh_add_to_parent_pe(edev);
+			eeh_pe_tree_insert(edev, parent);
 		}
 		eeh_edev_dbg(edev, "EEH is %s on device (code %d)\n",
 			     (enable ? "enabled" : "unsupported"), ret);
@@ -435,8 +646,10 @@
  * It's notable that zero'ed return value means invalid PE config
  * address.
  */
-static int pseries_eeh_get_pe_addr(struct eeh_pe *pe)
+static int pseries_eeh_get_pe_addr(struct pci_dn *pdn)
 {
+	int config_addr = rtas_config_addr(pdn->busno, pdn->devfn, 0);
+	unsigned long buid = pdn->phb->buid;
 	int ret = 0;
 	int rets[3];
 
@@ -447,18 +660,16 @@
 		 * meaningless.
 		 */
 		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
-				pe->config_addr, BUID_HI(pe->phb->buid),
-				BUID_LO(pe->phb->buid), 1);
+				config_addr, BUID_HI(buid), BUID_LO(buid), 1);
 		if (ret || (rets[0] == 0))
 			return 0;
 
 		/* Retrieve the associated PE config address */
 		ret = rtas_call(ibm_get_config_addr_info2, 4, 2, rets,
-				pe->config_addr, BUID_HI(pe->phb->buid),
-				BUID_LO(pe->phb->buid), 0);
+				config_addr, BUID_HI(buid), BUID_LO(buid), 0);
 		if (ret) {
 			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
-				__func__, pe->phb->global_number, pe->config_addr);
+				__func__, pdn->phb->global_number, config_addr);
 			return 0;
 		}
 
@@ -467,11 +678,10 @@
 
 	if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
 		ret = rtas_call(ibm_get_config_addr_info, 4, 2, rets,
-				pe->config_addr, BUID_HI(pe->phb->buid),
-				BUID_LO(pe->phb->buid), 0);
+				config_addr, BUID_HI(buid), BUID_LO(buid), 0);
 		if (ret) {
 			pr_warn("%s: Failed to get address for PHB#%x-PE#%x\n",
-				__func__, pe->phb->global_number, pe->config_addr);
+				__func__, pdn->phb->global_number, config_addr);
 			return 0;
 		}
 
@@ -569,35 +779,13 @@
 static int pseries_eeh_reset(struct eeh_pe *pe, int option)
 {
 	int config_addr;
-	int ret;
 
 	/* Figure out PE address */
 	config_addr = pe->config_addr;
 	if (pe->addr)
 		config_addr = pe->addr;
 
-	/* Reset PE through RTAS call */
-	ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
-			config_addr, BUID_HI(pe->phb->buid),
-			BUID_LO(pe->phb->buid), option);
-
-	/* If fundamental-reset not supported, try hot-reset */
-	if (option == EEH_RESET_FUNDAMENTAL &&
-	    ret == -8) {
-		option = EEH_RESET_HOT;
-		ret = rtas_call(ibm_set_slot_reset, 4, 1, NULL,
-				config_addr, BUID_HI(pe->phb->buid),
-				BUID_LO(pe->phb->buid), option);
-	}
-
-	/* We need reset hold or settlement delay */
-	if (option == EEH_RESET_FUNDAMENTAL ||
-	    option == EEH_RESET_HOT)
-		msleep(EEH_PE_RST_HOLD_TIME);
-	else
-		msleep(EEH_PE_RST_SETTLE_TIME);
-
-	return ret;
+	return pseries_eeh_phb_reset(pe->phb, config_addr, option);
 }
 
 /**
@@ -641,110 +829,51 @@
  * pseries_eeh_configure_bridge - Configure PCI bridges in the indicated PE
  * @pe: EEH PE
  *
- * The function will be called to reconfigure the bridges included
- * in the specified PE so that the mulfunctional PE would be recovered
- * again.
  */
 static int pseries_eeh_configure_bridge(struct eeh_pe *pe)
 {
 	int config_addr;
-	int ret;
-	/* Waiting 0.2s maximum before skipping configuration */
-	int max_wait = 200;
 
 	/* Figure out the PE address */
 	config_addr = pe->config_addr;
 	if (pe->addr)
 		config_addr = pe->addr;
 
-	while (max_wait > 0) {
-		ret = rtas_call(ibm_configure_pe, 3, 1, NULL,
-				config_addr, BUID_HI(pe->phb->buid),
-				BUID_LO(pe->phb->buid));
-
-		if (!ret)
-			return ret;
-		if (ret < 0)
-			break;
-
-		/*
-		 * If RTAS returns a delay value that's above 100ms, cut it
-		 * down to 100ms in case firmware made a mistake.  For more
-		 * on how these delay values work see rtas_busy_delay_time
-		 */
-		if (ret > RTAS_EXTENDED_DELAY_MIN+2 &&
-		    ret <= RTAS_EXTENDED_DELAY_MAX)
-			ret = RTAS_EXTENDED_DELAY_MIN+2;
-
-		max_wait -= rtas_busy_delay_time(ret);
-
-		if (max_wait < 0)
-			break;
-
-		rtas_busy_delay(ret);
-	}
-
-	pr_warn("%s: Unable to configure bridge PHB#%x-PE#%x (%d)\n",
-		__func__, pe->phb->global_number, pe->addr, ret);
-	/* PAPR defines -3 as "Parameter Error" for this function: */
-	if (ret == -3)
-		return -EINVAL;
-	else
-		return -EIO;
+	return pseries_eeh_phb_configure_bridge(pe->phb, config_addr);
 }
 
 /**
  * pseries_eeh_read_config - Read PCI config space
- * @pdn: PCI device node
- * @where: PCI address
+ * @edev: EEH device handle
+ * @where: PCI config space offset
  * @size: size to read
  * @val: return value
  *
  * Read config space from the speicifed device
  */
-static int pseries_eeh_read_config(struct pci_dn *pdn, int where, int size, u32 *val)
+static int pseries_eeh_read_config(struct eeh_dev *edev, int where, int size, u32 *val)
 {
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
 	return rtas_read_config(pdn, where, size, val);
 }
 
 /**
  * pseries_eeh_write_config - Write PCI config space
- * @pdn: PCI device node
- * @where: PCI address
+ * @edev: EEH device handle
+ * @where: PCI config space offset
  * @size: size to write
  * @val: value to be written
  *
  * Write config space to the specified device
  */
-static int pseries_eeh_write_config(struct pci_dn *pdn, int where, int size, u32 val)
+static int pseries_eeh_write_config(struct eeh_dev *edev, int where, int size, u32 val)
 {
+	struct pci_dn *pdn = eeh_dev_to_pdn(edev);
+
 	return rtas_write_config(pdn, where, size, val);
 }
 
-static int pseries_eeh_restore_config(struct pci_dn *pdn)
-{
-	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-	s64 ret = 0;
-
-	if (!edev)
-		return -EEXIST;
-
-	/*
-	 * FIXME: The MPS, error routing rules, timeout setting are worthy
-	 * to be exported by firmware in extendible way.
-	 */
-	if (edev->physfn)
-		ret = eeh_restore_vf_config(pdn);
-
-	if (ret) {
-		pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n",
-			__func__, edev->pe_config_addr, ret);
-		return -EIO;
-	}
-
-	return ret;
-}
-
 #ifdef CONFIG_PCI_IOV
 int pseries_send_allow_unfreeze(struct pci_dn *pdn,
 				u16 *vf_pe_array, int cur_vfs)
@@ -772,8 +901,8 @@
 
 static int pseries_call_allow_unfreeze(struct eeh_dev *edev)
 {
+	int cur_vfs = 0, rc = 0, vf_index, bus, devfn, vf_pe_num;
 	struct pci_dn *pdn, *tmp, *parent, *physfn_pdn;
-	int cur_vfs = 0, rc = 0, vf_index, bus, devfn;
 	u16 *vf_pe_array;
 
 	vf_pe_array = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
@@ -806,8 +935,10 @@
 			}
 		} else {
 			pdn = pci_get_pdn(edev->pdev);
-			vf_pe_array[0] = cpu_to_be16(pdn->pe_number);
 			physfn_pdn = pci_get_pdn(edev->physfn);
+
+			vf_pe_num = physfn_pdn->pe_num_map[edev->vf_index];
+			vf_pe_array[0] = cpu_to_be16(vf_pe_num);
 			rc = pseries_send_allow_unfreeze(physfn_pdn,
 							 vf_pe_array, 1);
 			pdn->last_allow_rc = rc;
@@ -818,10 +949,8 @@
 	return rc;
 }
 
-static int pseries_notify_resume(struct pci_dn *pdn)
+static int pseries_notify_resume(struct eeh_dev *edev)
 {
-	struct eeh_dev *edev = pdn_to_eeh_dev(pdn);
-
 	if (!edev)
 		return -EEXIST;
 
@@ -841,7 +970,6 @@
 	.init			= pseries_eeh_init,
 	.probe			= pseries_eeh_probe,
 	.set_option		= pseries_eeh_set_option,
-	.get_pe_addr		= pseries_eeh_get_pe_addr,
 	.get_state		= pseries_eeh_get_state,
 	.reset			= pseries_eeh_reset,
 	.get_log		= pseries_eeh_get_log,
@@ -850,7 +978,7 @@
 	.read_config		= pseries_eeh_read_config,
 	.write_config		= pseries_eeh_write_config,
 	.next_error		= NULL,
-	.restore_config		= pseries_eeh_restore_config,
+	.restore_config		= NULL, /* NB: configure_bridge() does this */
 #ifdef CONFIG_PCI_IOV
 	.notify_resume		= pseries_notify_resume
 #endif
diff --git a/arch/powerpc/platforms/pseries/firmware.c b/arch/powerpc/platforms/pseries/firmware.c
index 3e49cc2..4c7b7f5 100644
--- a/arch/powerpc/platforms/pseries/firmware.c
+++ b/arch/powerpc/platforms/pseries/firmware.c
@@ -65,6 +65,7 @@
 	{FW_FEATURE_HPT_RESIZE,		"hcall-hpt-resize"},
 	{FW_FEATURE_BLOCK_REMOVE,	"hcall-block-remove"},
 	{FW_FEATURE_PAPR_SCM,		"hcall-scm"},
+	{FW_FEATURE_RPT_INVALIDATE,	"hcall-rpt-invalidate"},
 };
 
 /* Build up the firmware features bitmask using the contents of
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 3e8cbfe..c6e0d8a 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -35,54 +35,10 @@
 #include <asm/topology.h>
 
 #include "pseries.h"
-#include "offline_states.h"
 
 /* This version can't take the spinlock, because it never returns */
 static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
 
-static DEFINE_PER_CPU(enum cpu_state_vals, preferred_offline_state) =
-							CPU_STATE_OFFLINE;
-static DEFINE_PER_CPU(enum cpu_state_vals, current_state) = CPU_STATE_OFFLINE;
-
-static enum cpu_state_vals default_offline_state = CPU_STATE_OFFLINE;
-
-static bool cede_offline_enabled __read_mostly = true;
-
-/*
- * Enable/disable cede_offline when available.
- */
-static int __init setup_cede_offline(char *str)
-{
-	return (kstrtobool(str, &cede_offline_enabled) == 0);
-}
-
-__setup("cede_offline=", setup_cede_offline);
-
-enum cpu_state_vals get_cpu_current_state(int cpu)
-{
-	return per_cpu(current_state, cpu);
-}
-
-void set_cpu_current_state(int cpu, enum cpu_state_vals state)
-{
-	per_cpu(current_state, cpu) = state;
-}
-
-enum cpu_state_vals get_preferred_offline_state(int cpu)
-{
-	return per_cpu(preferred_offline_state, cpu);
-}
-
-void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
-{
-	per_cpu(preferred_offline_state, cpu) = state;
-}
-
-void set_default_offline_state(int cpu)
-{
-	per_cpu(preferred_offline_state, cpu) = default_offline_state;
-}
-
 static void rtas_stop_self(void)
 {
 	static struct rtas_args args;
@@ -101,9 +57,7 @@
 
 static void pseries_mach_cpu_die(void)
 {
-	unsigned int cpu = smp_processor_id();
 	unsigned int hwcpu = hard_smp_processor_id();
-	u8 cede_latency_hint = 0;
 
 	local_irq_disable();
 	idle_task_exit();
@@ -112,49 +66,6 @@
 	else
 		xics_teardown_cpu();
 
-	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
-		set_cpu_current_state(cpu, CPU_STATE_INACTIVE);
-		if (ppc_md.suspend_disable_cpu)
-			ppc_md.suspend_disable_cpu();
-
-		cede_latency_hint = 2;
-
-		get_lppaca()->idle = 1;
-		if (!lppaca_shared_proc(get_lppaca()))
-			get_lppaca()->donate_dedicated_cpu = 1;
-
-		while (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
-			while (!prep_irq_for_idle()) {
-				local_irq_enable();
-				local_irq_disable();
-			}
-
-			extended_cede_processor(cede_latency_hint);
-		}
-
-		local_irq_disable();
-
-		if (!lppaca_shared_proc(get_lppaca()))
-			get_lppaca()->donate_dedicated_cpu = 0;
-		get_lppaca()->idle = 0;
-
-		if (get_preferred_offline_state(cpu) == CPU_STATE_ONLINE) {
-			unregister_slb_shadow(hwcpu);
-
-			hard_irq_disable();
-			/*
-			 * Call to start_secondary_resume() will not return.
-			 * Kernel stack will be reset and start_secondary()
-			 * will be called to continue the online operation.
-			 */
-			start_secondary_resume();
-		}
-	}
-
-	/* Requested state is CPU_STATE_OFFLINE at this point */
-	WARN_ON(get_preferred_offline_state(cpu) != CPU_STATE_OFFLINE);
-
-	set_cpu_current_state(cpu, CPU_STATE_OFFLINE);
 	unregister_slb_shadow(hwcpu);
 	rtas_stop_self();
 
@@ -200,24 +111,13 @@
 	int cpu_status = 1;
 	unsigned int pcpu = get_hard_smp_processor_id(cpu);
 
-	if (get_preferred_offline_state(cpu) == CPU_STATE_INACTIVE) {
-		cpu_status = 1;
-		for (tries = 0; tries < 5000; tries++) {
-			if (get_cpu_current_state(cpu) == CPU_STATE_INACTIVE) {
-				cpu_status = 0;
-				break;
-			}
-			msleep(1);
-		}
-	} else if (get_preferred_offline_state(cpu) == CPU_STATE_OFFLINE) {
+	for (tries = 0; tries < 25; tries++) {
+		cpu_status = smp_query_cpu_stopped(pcpu);
+		if (cpu_status == QCSS_STOPPED ||
+		    cpu_status == QCSS_HARDWARE_ERROR)
+			break;
+		cpu_relax();
 
-		for (tries = 0; tries < 25; tries++) {
-			cpu_status = smp_query_cpu_stopped(pcpu);
-			if (cpu_status == QCSS_STOPPED ||
-			    cpu_status == QCSS_HARDWARE_ERROR)
-				break;
-			cpu_relax();
-		}
 	}
 
 	if (cpu_status != 0) {
@@ -359,28 +259,14 @@
 			if (get_hard_smp_processor_id(cpu) != thread)
 				continue;
 
-			if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+			if (!cpu_online(cpu))
 				break;
 
-			if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
-				set_preferred_offline_state(cpu,
-							    CPU_STATE_OFFLINE);
-				cpu_maps_update_done();
-				timed_topology_update(1);
-				rc = device_offline(get_cpu_device(cpu));
-				if (rc)
-					goto out;
-				cpu_maps_update_begin();
-				break;
-			}
-
-			/*
-			 * The cpu is in CPU_STATE_INACTIVE.
-			 * Upgrade it's state to CPU_STATE_OFFLINE.
-			 */
-			set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
-			WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS);
-			__cpu_die(cpu);
+			cpu_maps_update_done();
+			rc = device_offline(get_cpu_device(cpu));
+			if (rc)
+				goto out;
+			cpu_maps_update_begin();
 			break;
 		}
 		if (cpu == num_possible_cpus()) {
@@ -414,10 +300,7 @@
 		for_each_present_cpu(cpu) {
 			if (get_hard_smp_processor_id(cpu) != thread)
 				continue;
-			BUG_ON(get_cpu_current_state(cpu)
-					!= CPU_STATE_OFFLINE);
 			cpu_maps_update_done();
-			timed_topology_update(1);
 			find_and_online_cpu_nid(cpu);
 			rc = device_online(get_cpu_device(cpu));
 			if (rc) {
@@ -854,7 +737,6 @@
 	parent = of_find_node_by_path("/cpus");
 	if (!parent) {
 		pr_warn("Could not find CPU root node in device tree\n");
-		kfree(cpu_drcs);
 		return -1;
 	}
 
@@ -896,25 +778,6 @@
 	return rc;
 }
 
-int dlpar_cpu_readd(int cpu)
-{
-	struct device_node *dn;
-	struct device *dev;
-	u32 drc_index;
-	int rc;
-
-	dev = get_cpu_device(cpu);
-	dn = dev->of_node;
-
-	rc = of_property_read_u32(dn, "ibm,my-drc-index", &drc_index);
-
-	rc = dlpar_cpu_remove_by_index(drc_index);
-	if (!rc)
-		rc = dlpar_cpu_add(drc_index);
-
-	return rc;
-}
-
 int dlpar_cpu(struct pseries_hp_errorlog *hp_elog)
 {
 	u32 count, drc_index;
@@ -1013,27 +876,8 @@
 	.notifier_call = pseries_smp_notifier,
 };
 
-#define MAX_CEDE_LATENCY_LEVELS		4
-#define	CEDE_LATENCY_PARAM_LENGTH	10
-#define CEDE_LATENCY_PARAM_MAX_LENGTH	\
-	(MAX_CEDE_LATENCY_LEVELS * CEDE_LATENCY_PARAM_LENGTH * sizeof(char))
-#define CEDE_LATENCY_TOKEN		45
-
-static char cede_parameters[CEDE_LATENCY_PARAM_MAX_LENGTH];
-
-static int parse_cede_parameters(void)
-{
-	memset(cede_parameters, 0, CEDE_LATENCY_PARAM_MAX_LENGTH);
-	return rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
-			 NULL,
-			 CEDE_LATENCY_TOKEN,
-			 __pa(cede_parameters),
-			 CEDE_LATENCY_PARAM_MAX_LENGTH);
-}
-
 static int __init pseries_cpu_hotplug_init(void)
 {
-	int cpu;
 	int qcss_tok;
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
@@ -1056,16 +900,8 @@
 	smp_ops->cpu_die = pseries_cpu_die;
 
 	/* Processors can be added/removed only on LPAR */
-	if (firmware_has_feature(FW_FEATURE_LPAR)) {
+	if (firmware_has_feature(FW_FEATURE_LPAR))
 		of_reconfig_notifier_register(&pseries_smp_nb);
-		cpu_maps_update_begin();
-		if (cede_offline_enabled && parse_cede_parameters() == 0) {
-			default_offline_state = CPU_STATE_INACTIVE;
-			for_each_online_cpu(cpu)
-				set_default_offline_state(cpu);
-		}
-		cpu_maps_update_done();
-	}
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 5ace2f9..5d545b7 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -22,12 +22,10 @@
 #include <asm/drmem.h>
 #include "pseries.h"
 
-static bool rtas_hp_event;
-
 unsigned long pseries_memory_block_size(void)
 {
 	struct device_node *np;
-	unsigned int memblock_size = MIN_MEMORY_BLOCK_SIZE;
+	u64 memblock_size = MIN_MEMORY_BLOCK_SIZE;
 	struct resource r;
 
 	np = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
@@ -487,40 +485,6 @@
 	return rc;
 }
 
-static int dlpar_memory_readd_by_index(u32 drc_index)
-{
-	struct drmem_lmb *lmb;
-	int lmb_found;
-	int rc;
-
-	pr_info("Attempting to update LMB, drc index %x\n", drc_index);
-
-	lmb_found = 0;
-	for_each_drmem_lmb(lmb) {
-		if (lmb->drc_index == drc_index) {
-			lmb_found = 1;
-			rc = dlpar_remove_lmb(lmb);
-			if (!rc) {
-				rc = dlpar_add_lmb(lmb);
-				if (rc)
-					dlpar_release_drc(lmb->drc_index);
-			}
-			break;
-		}
-	}
-
-	if (!lmb_found)
-		rc = -EINVAL;
-
-	if (rc)
-		pr_info("Failed to update memory at %llx\n",
-			lmb->base_addr);
-	else
-		pr_info("Memory at %llx was updated\n", lmb->base_addr);
-
-	return rc;
-}
-
 static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
 {
 	struct drmem_lmb *lmb, *start_lmb, *end_lmb;
@@ -617,10 +581,6 @@
 {
 	return -EOPNOTSUPP;
 }
-static int dlpar_memory_readd_by_index(u32 drc_index)
-{
-	return -EOPNOTSUPP;
-}
 
 static int dlpar_memory_remove_by_ic(u32 lmbs_to_remove, u32 drc_index)
 {
@@ -903,21 +863,14 @@
 		}
 
 		break;
-	case PSERIES_HP_ELOG_ACTION_READD:
-		drc_index = hp_elog->_drc_u.drc_index;
-		rc = dlpar_memory_readd_by_index(drc_index);
-		break;
 	default:
 		pr_err("Invalid action (%d) specified\n", hp_elog->action);
 		rc = -EINVAL;
 		break;
 	}
 
-	if (!rc) {
-		rtas_hp_event = true;
+	if (!rc)
 		rc = drmem_update_dt();
-		rtas_hp_event = false;
-	}
 
 	unlock_device_hotplug();
 	return rc;
@@ -953,60 +906,6 @@
 	return (ret < 0) ? -EINVAL : 0;
 }
 
-static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
-{
-	struct of_drconf_cell_v1 *new_drmem, *old_drmem;
-	unsigned long memblock_size;
-	u32 entries;
-	__be32 *p;
-	int i, rc = -EINVAL;
-
-	if (rtas_hp_event)
-		return 0;
-
-	memblock_size = pseries_memory_block_size();
-	if (!memblock_size)
-		return -EINVAL;
-
-	if (!pr->old_prop)
-		return 0;
-
-	p = (__be32 *) pr->old_prop->value;
-	if (!p)
-		return -EINVAL;
-
-	/* The first int of the property is the number of lmb's described
-	 * by the property. This is followed by an array of of_drconf_cell
-	 * entries. Get the number of entries and skip to the array of
-	 * of_drconf_cell's.
-	 */
-	entries = be32_to_cpu(*p++);
-	old_drmem = (struct of_drconf_cell_v1 *)p;
-
-	p = (__be32 *)pr->prop->value;
-	p++;
-	new_drmem = (struct of_drconf_cell_v1 *)p;
-
-	for (i = 0; i < entries; i++) {
-		if ((be32_to_cpu(old_drmem[i].flags) & DRCONF_MEM_ASSIGNED) &&
-		    (!(be32_to_cpu(new_drmem[i].flags) & DRCONF_MEM_ASSIGNED))) {
-			rc = pseries_remove_memblock(
-				be64_to_cpu(old_drmem[i].base_addr),
-						     memblock_size);
-			break;
-		} else if ((!(be32_to_cpu(old_drmem[i].flags) &
-			    DRCONF_MEM_ASSIGNED)) &&
-			    (be32_to_cpu(new_drmem[i].flags) &
-			    DRCONF_MEM_ASSIGNED)) {
-			rc = memblock_add(be64_to_cpu(old_drmem[i].base_addr),
-					  memblock_size);
-			rc = (rc < 0) ? -EINVAL : 0;
-			break;
-		}
-	}
-	return rc;
-}
-
 static int pseries_memory_notifier(struct notifier_block *nb,
 				   unsigned long action, void *data)
 {
@@ -1020,10 +919,6 @@
 	case OF_RECONFIG_DETACH_NODE:
 		err = pseries_remove_mem_node(rd->dn);
 		break;
-	case OF_RECONFIG_UPDATE_PROPERTY:
-		if (!strcmp(rd->prop->name, "ibm,dynamic-memory"))
-			err = pseries_update_drconf_memory(rd);
-		break;
 	}
 	return notifier_from_errno(err);
 }
diff --git a/arch/powerpc/platforms/pseries/hvcserver.c b/arch/powerpc/platforms/pseries/hvcserver.c
index 267139b..96e18d3 100644
--- a/arch/powerpc/platforms/pseries/hvcserver.c
+++ b/arch/powerpc/platforms/pseries/hvcserver.c
@@ -45,7 +45,7 @@
 		case H_LONG_BUSY_ORDER_10_SEC:
 		case H_LONG_BUSY_ORDER_100_SEC:
 			return -EBUSY;
-		case H_FUNCTION: /* fall through */
+		case H_FUNCTION:
 		default:
 			return -EPERM;
 	}
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index f71ff2c..baf24ea 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -1681,9 +1681,11 @@
 
 	if (table_size)
 		flags |= PROC_TABLE_NEW;
-	if (radix_enabled())
-		flags |= PROC_TABLE_RADIX | PROC_TABLE_GTSE;
-	else
+	if (radix_enabled()) {
+		flags |= PROC_TABLE_RADIX;
+		if (mmu_has_feature(MMU_FTR_GTSE))
+			flags |= PROC_TABLE_GTSE;
+	} else
 		flags |= PROC_TABLE_HPT_SLB;
 	for (;;) {
 		rc = plpar_hcall_norets(H_REGISTER_PROC_TBL, flags, base,
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 10d9829..d6f4162 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -6,6 +6,9 @@
  * Copyright (C) 2010 IBM Corporation
  */
 
+
+#define pr_fmt(fmt) "mobility: " fmt
+
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/kobject.h>
@@ -64,6 +67,8 @@
 	if (!dn)
 		return -ENOENT;
 
+	pr_debug("removing node %pOFfp\n", dn);
+
 	dlpar_detach_node(dn);
 	of_node_put(dn);
 	return 0;
@@ -122,6 +127,7 @@
 	}
 
 	if (!more) {
+		pr_debug("updating node %pOF property %s\n", dn, name);
 		of_update_property(dn, new_prop);
 		*prop = NULL;
 	}
@@ -240,33 +246,12 @@
 	if (rc)
 		dlpar_free_cc_nodes(dn);
 
+	pr_debug("added node %pOFfp\n", dn);
+
 	of_node_put(parent_dn);
 	return rc;
 }
 
-static void prrn_update_node(__be32 phandle)
-{
-	struct pseries_hp_errorlog hp_elog;
-	struct device_node *dn;
-
-	/*
-	 * If a node is found from a the given phandle, the phandle does not
-	 * represent the drc index of an LMB and we can ignore.
-	 */
-	dn = of_find_node_by_phandle(be32_to_cpu(phandle));
-	if (dn) {
-		of_node_put(dn);
-		return;
-	}
-
-	hp_elog.resource = PSERIES_HP_ELOG_RESOURCE_MEM;
-	hp_elog.action = PSERIES_HP_ELOG_ACTION_READD;
-	hp_elog.id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
-	hp_elog._drc_u.drc_index = phandle;
-
-	handle_dlpar_errorlog(&hp_elog);
-}
-
 int pseries_devicetree_update(s32 scope)
 {
 	char *rtas_buf;
@@ -305,10 +290,6 @@
 					break;
 				case UPDATE_DT_NODE:
 					update_dt_node(phandle, scope);
-
-					if (scope == PRRN_SCOPE)
-						prrn_update_node(phandle);
-
 					break;
 				case ADD_DT_NODE:
 					drc_index = *data++;
@@ -388,8 +369,6 @@
 	if (rc)
 		return rc;
 
-	stop_topology_update();
-
 	do {
 		rc = rtas_ibm_suspend_me(streamid);
 		if (rc == -EAGAIN)
@@ -401,8 +380,6 @@
 
 	post_mobility_fixup();
 
-	start_topology_update();
-
 	return count;
 }
 
@@ -427,11 +404,11 @@
 
 	rc = sysfs_create_file(mobility_kobj, &class_attr_migration.attr);
 	if (rc)
-		pr_err("mobility: unable to create migration sysfs file (%d)\n", rc);
+		pr_err("unable to create migration sysfs file (%d)\n", rc);
 
 	rc = sysfs_create_file(mobility_kobj, &class_attr_api_version.attr.attr);
 	if (rc)
-		pr_err("mobility: unable to create api_version sysfs file (%d)\n", rc);
+		pr_err("unable to create api_version sysfs file (%d)\n", rc);
 
 	return 0;
 }
diff --git a/arch/powerpc/platforms/pseries/offline_states.h b/arch/powerpc/platforms/pseries/offline_states.h
deleted file mode 100644
index 51414ae..0000000
--- a/arch/powerpc/platforms/pseries/offline_states.h
+++ /dev/null
@@ -1,38 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _OFFLINE_STATES_H_
-#define _OFFLINE_STATES_H_
-
-/* Cpu offline states go here */
-enum cpu_state_vals {
-	CPU_STATE_OFFLINE,
-	CPU_STATE_INACTIVE,
-	CPU_STATE_ONLINE,
-	CPU_MAX_OFFLINE_STATES
-};
-
-#ifdef CONFIG_HOTPLUG_CPU
-extern enum cpu_state_vals get_cpu_current_state(int cpu);
-extern void set_cpu_current_state(int cpu, enum cpu_state_vals state);
-extern void set_preferred_offline_state(int cpu, enum cpu_state_vals state);
-extern void set_default_offline_state(int cpu);
-#else
-static inline enum cpu_state_vals get_cpu_current_state(int cpu)
-{
-	return CPU_STATE_ONLINE;
-}
-
-static inline void set_cpu_current_state(int cpu, enum cpu_state_vals state)
-{
-}
-
-static inline void set_preferred_offline_state(int cpu, enum cpu_state_vals state)
-{
-}
-
-static inline void set_default_offline_state(int cpu)
-{
-}
-#endif
-
-extern enum cpu_state_vals get_preferred_offline_state(int cpu);
-#endif
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 9c56907..f439f0d 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -13,9 +13,11 @@
 #include <linux/platform_device.h>
 #include <linux/delay.h>
 #include <linux/seq_buf.h>
+#include <linux/nd.h>
 
 #include <asm/plpar_wrappers.h>
 #include <asm/papr_pdsm.h>
+#include <asm/mce.h>
 
 #define BIND_ANY_ADDR (~0ul)
 
@@ -62,6 +64,26 @@
 				    PAPR_PMEM_HEALTH_FATAL |	\
 				    PAPR_PMEM_HEALTH_UNHEALTHY)
 
+#define PAPR_SCM_PERF_STATS_EYECATCHER __stringify(SCMSTATS)
+#define PAPR_SCM_PERF_STATS_VERSION 0x1
+
+/* Struct holding a single performance metric */
+struct papr_scm_perf_stat {
+	u8 stat_id[8];
+	__be64 stat_val;
+} __packed;
+
+/* Struct exchanged between kernel and PHYP for fetching drc perf stats */
+struct papr_scm_perf_stats {
+	u8 eye_catcher[8];
+	/* Should be PAPR_SCM_PERF_STATS_VERSION */
+	__be32 stats_version;
+	/* Number of stats following */
+	__be32 num_statistics;
+	/* zero or more performance matrics */
+	struct papr_scm_perf_stat scm_statistic[];
+} __packed;
+
 /* private struct associated with each region */
 struct papr_scm_priv {
 	struct platform_device *pdev;
@@ -80,6 +102,7 @@
 	struct resource res;
 	struct nd_region *region;
 	struct nd_interleave_set nd_set;
+	struct list_head region_list;
 
 	/* Protect dimm health data from concurrent read/writes */
 	struct mutex health_mutex;
@@ -89,8 +112,14 @@
 
 	/* Health information for the dimm */
 	u64 health_bitmap;
+
+	/* length of the stat buffer as expected by phyp */
+	size_t stat_buffer_len;
 };
 
+static LIST_HEAD(papr_nd_regions);
+static DEFINE_MUTEX(papr_ndr_lock);
+
 static int drc_pmem_bind(struct papr_scm_priv *p)
 {
 	unsigned long ret[PLPAR_HCALL_BUFSIZE];
@@ -195,6 +224,79 @@
 }
 
 /*
+ * Query the Dimm performance stats from PHYP and copy them (if returned) to
+ * provided struct papr_scm_perf_stats instance 'stats' that can hold atleast
+ * (num_stats + header) bytes.
+ * - If buff_stats == NULL the return value is the size in byes of the buffer
+ * needed to hold all supported performance-statistics.
+ * - If buff_stats != NULL and num_stats == 0 then we copy all known
+ * performance-statistics to 'buff_stat' and expect to be large enough to
+ * hold them.
+ * - if buff_stats != NULL and num_stats > 0 then copy the requested
+ * performance-statistics to buff_stats.
+ */
+static ssize_t drc_pmem_query_stats(struct papr_scm_priv *p,
+				    struct papr_scm_perf_stats *buff_stats,
+				    unsigned int num_stats)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE];
+	size_t size;
+	s64 rc;
+
+	/* Setup the out buffer */
+	if (buff_stats) {
+		memcpy(buff_stats->eye_catcher,
+		       PAPR_SCM_PERF_STATS_EYECATCHER, 8);
+		buff_stats->stats_version =
+			cpu_to_be32(PAPR_SCM_PERF_STATS_VERSION);
+		buff_stats->num_statistics =
+			cpu_to_be32(num_stats);
+
+		/*
+		 * Calculate the buffer size based on num-stats provided
+		 * or use the prefetched max buffer length
+		 */
+		if (num_stats)
+			/* Calculate size from the num_stats */
+			size = sizeof(struct papr_scm_perf_stats) +
+				num_stats * sizeof(struct papr_scm_perf_stat);
+		else
+			size = p->stat_buffer_len;
+	} else {
+		/* In case of no out buffer ignore the size */
+		size = 0;
+	}
+
+	/* Do the HCALL asking PHYP for info */
+	rc = plpar_hcall(H_SCM_PERFORMANCE_STATS, ret, p->drc_index,
+			 buff_stats ? virt_to_phys(buff_stats) : 0,
+			 size);
+
+	/* Check if the error was due to an unknown stat-id */
+	if (rc == H_PARTIAL) {
+		dev_err(&p->pdev->dev,
+			"Unknown performance stats, Err:0x%016lX\n", ret[0]);
+		return -ENOENT;
+	} else if (rc != H_SUCCESS) {
+		dev_err(&p->pdev->dev,
+			"Failed to query performance stats, Err:%lld\n", rc);
+		return -EIO;
+
+	} else if (!size) {
+		/* Handle case where stat buffer size was requested */
+		dev_dbg(&p->pdev->dev,
+			"Performance stats size %ld\n", ret[0]);
+		return ret[0];
+	}
+
+	/* Successfully fetched the requested stats from phyp */
+	dev_dbg(&p->pdev->dev,
+		"Performance stats returned %d stats\n",
+		be32_to_cpu(buff_stats->num_statistics));
+	return 0;
+}
+
+/*
  * Issue hcall to retrieve dimm health info and populate papr_scm_priv with the
  * health information.
  */
@@ -416,6 +518,51 @@
 	return 0;
 }
 
+static int papr_pdsm_fuel_gauge(struct papr_scm_priv *p,
+				union nd_pdsm_payload *payload)
+{
+	int rc, size;
+	u64 statval;
+	struct papr_scm_perf_stat *stat;
+	struct papr_scm_perf_stats *stats;
+
+	/* Silently fail if fetching performance metrics isn't  supported */
+	if (!p->stat_buffer_len)
+		return 0;
+
+	/* Allocate request buffer enough to hold single performance stat */
+	size = sizeof(struct papr_scm_perf_stats) +
+		sizeof(struct papr_scm_perf_stat);
+
+	stats = kzalloc(size, GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	stat = &stats->scm_statistic[0];
+	memcpy(&stat->stat_id, "MemLife ", sizeof(stat->stat_id));
+	stat->stat_val = 0;
+
+	/* Fetch the fuel gauge and populate it in payload */
+	rc = drc_pmem_query_stats(p, stats, 1);
+	if (rc < 0) {
+		dev_dbg(&p->pdev->dev, "Err(%d) fetching fuel gauge\n", rc);
+		goto free_stats;
+	}
+
+	statval = be64_to_cpu(stat->stat_val);
+	dev_dbg(&p->pdev->dev,
+		"Fetched fuel-gauge %llu", statval);
+	payload->health.extension_flags |=
+		PDSM_DIMM_HEALTH_RUN_GAUGE_VALID;
+	payload->health.dimm_fuel_gauge = statval;
+
+	rc = sizeof(struct nd_papr_pdsm_health);
+
+free_stats:
+	kfree(stats);
+	return rc;
+}
+
 /* Fetch the DIMM health info and populate it in provided package. */
 static int papr_pdsm_health(struct papr_scm_priv *p,
 			    union nd_pdsm_payload *payload)
@@ -456,6 +603,10 @@
 
 	/* struct populated hence can release the mutex now */
 	mutex_unlock(&p->health_mutex);
+
+	/* Populate the fuel gauge meter in the payload */
+	papr_pdsm_fuel_gauge(p, payload);
+
 	rc = sizeof(struct nd_papr_pdsm_health);
 
 out:
@@ -631,6 +782,48 @@
 	return 0;
 }
 
+static ssize_t perf_stats_show(struct device *dev,
+			       struct device_attribute *attr, char *buf)
+{
+	int index, rc;
+	struct seq_buf s;
+	struct papr_scm_perf_stat *stat;
+	struct papr_scm_perf_stats *stats;
+	struct nvdimm *dimm = to_nvdimm(dev);
+	struct papr_scm_priv *p = nvdimm_provider_data(dimm);
+
+	if (!p->stat_buffer_len)
+		return -ENOENT;
+
+	/* Allocate the buffer for phyp where stats are written */
+	stats = kzalloc(p->stat_buffer_len, GFP_KERNEL);
+	if (!stats)
+		return -ENOMEM;
+
+	/* Ask phyp to return all dimm perf stats */
+	rc = drc_pmem_query_stats(p, stats, 0);
+	if (rc)
+		goto free_stats;
+	/*
+	 * Go through the returned output buffer and print stats and
+	 * values. Since stat_id is essentially a char string of
+	 * 8 bytes, simply use the string format specifier to print it.
+	 */
+	seq_buf_init(&s, buf, PAGE_SIZE);
+	for (index = 0, stat = stats->scm_statistic;
+	     index < be32_to_cpu(stats->num_statistics);
+	     ++index, ++stat) {
+		seq_buf_printf(&s, "%.8s = 0x%016llX\n",
+			       stat->stat_id,
+			       be64_to_cpu(stat->stat_val));
+	}
+
+free_stats:
+	kfree(stats);
+	return rc ? rc : seq_buf_used(&s);
+}
+DEVICE_ATTR_RO(perf_stats);
+
 static ssize_t flags_show(struct device *dev,
 			  struct device_attribute *attr, char *buf)
 {
@@ -676,6 +869,7 @@
 /* papr_scm specific dimm attributes */
 static struct attribute *papr_nd_attributes[] = {
 	&dev_attr_flags.attr,
+	&dev_attr_perf_stats.attr,
 	NULL,
 };
 
@@ -696,6 +890,7 @@
 	struct nd_region_desc ndr_desc;
 	unsigned long dimm_flags;
 	int target_nid, online_nid;
+	ssize_t stat_size;
 
 	p->bus_desc.ndctl = papr_scm_ndctl;
 	p->bus_desc.module = THIS_MODULE;
@@ -759,6 +954,20 @@
 		dev_info(dev, "Region registered with target node %d and online node %d",
 			 target_nid, online_nid);
 
+	mutex_lock(&papr_ndr_lock);
+	list_add_tail(&p->region_list, &papr_nd_regions);
+	mutex_unlock(&papr_ndr_lock);
+
+	/* Try retriving the stat buffer and see if its supported */
+	stat_size = drc_pmem_query_stats(p, NULL, 0);
+	if (stat_size > 0) {
+		p->stat_buffer_len = stat_size;
+		dev_dbg(&p->pdev->dev, "Max perf-stat size %lu-bytes\n",
+			p->stat_buffer_len);
+	} else {
+		dev_info(&p->pdev->dev, "Dimm performance stats unavailable\n");
+	}
+
 	return 0;
 
 err:	nvdimm_bus_unregister(p->bus);
@@ -766,6 +975,68 @@
 	return -ENXIO;
 }
 
+static void papr_scm_add_badblock(struct nd_region *region,
+				  struct nvdimm_bus *bus, u64 phys_addr)
+{
+	u64 aligned_addr = ALIGN_DOWN(phys_addr, L1_CACHE_BYTES);
+
+	if (nvdimm_bus_add_badrange(bus, aligned_addr, L1_CACHE_BYTES)) {
+		pr_err("Bad block registration for 0x%llx failed\n", phys_addr);
+		return;
+	}
+
+	pr_debug("Add memory range (0x%llx - 0x%llx) as bad range\n",
+		 aligned_addr, aligned_addr + L1_CACHE_BYTES);
+
+	nvdimm_region_notify(region, NVDIMM_REVALIDATE_POISON);
+}
+
+static int handle_mce_ue(struct notifier_block *nb, unsigned long val,
+			 void *data)
+{
+	struct machine_check_event *evt = data;
+	struct papr_scm_priv *p;
+	u64 phys_addr;
+	bool found = false;
+
+	if (evt->error_type != MCE_ERROR_TYPE_UE)
+		return NOTIFY_DONE;
+
+	if (list_empty(&papr_nd_regions))
+		return NOTIFY_DONE;
+
+	/*
+	 * The physical address obtained here is PAGE_SIZE aligned, so get the
+	 * exact address from the effective address
+	 */
+	phys_addr = evt->u.ue_error.physical_address +
+			(evt->u.ue_error.effective_address & ~PAGE_MASK);
+
+	if (!evt->u.ue_error.physical_address_provided ||
+	    !is_zone_device_page(pfn_to_page(phys_addr >> PAGE_SHIFT)))
+		return NOTIFY_DONE;
+
+	/* mce notifier is called from a process context, so mutex is safe */
+	mutex_lock(&papr_ndr_lock);
+	list_for_each_entry(p, &papr_nd_regions, region_list) {
+		if (phys_addr >= p->res.start && phys_addr <= p->res.end) {
+			found = true;
+			break;
+		}
+	}
+
+	if (found)
+		papr_scm_add_badblock(p->region, p->bus, phys_addr);
+
+	mutex_unlock(&papr_ndr_lock);
+
+	return found ? NOTIFY_OK : NOTIFY_DONE;
+}
+
+static struct notifier_block mce_ue_nb = {
+	.notifier_call = handle_mce_ue
+};
+
 static int papr_scm_probe(struct platform_device *pdev)
 {
 	struct device_node *dn = pdev->dev.of_node;
@@ -866,6 +1137,10 @@
 {
 	struct papr_scm_priv *p = platform_get_drvdata(pdev);
 
+	mutex_lock(&papr_ndr_lock);
+	list_del(&p->region_list);
+	mutex_unlock(&papr_ndr_lock);
+
 	nvdimm_bus_unregister(p->bus);
 	drc_pmem_unbind(p);
 	kfree(p->bus_desc.provider_name);
@@ -876,6 +1151,7 @@
 
 static const struct of_device_id papr_scm_match[] = {
 	{ .compatible = "ibm,pmemory" },
+	{ .compatible = "ibm,pmemory-v2" },
 	{ },
 };
 
@@ -888,7 +1164,25 @@
 	},
 };
 
-module_platform_driver(papr_scm_driver);
+static int __init papr_scm_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&papr_scm_driver);
+	if (!ret)
+		mce_register_notifier(&mce_ue_nb);
+
+	return ret;
+}
+module_init(papr_scm_init);
+
+static void __exit papr_scm_exit(void)
+{
+	mce_unregister_notifier(&mce_ue_nb);
+	platform_driver_unregister(&papr_scm_driver);
+}
+module_exit(papr_scm_exit);
+
 MODULE_DEVICE_TABLE(of, papr_scm_match);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("IBM Corporation");
diff --git a/arch/powerpc/platforms/pseries/pci_dlpar.c b/arch/powerpc/platforms/pseries/pci_dlpar.c
index b3a38f5..f9ae17e 100644
--- a/arch/powerpc/platforms/pseries/pci_dlpar.c
+++ b/arch/powerpc/platforms/pseries/pci_dlpar.c
@@ -34,7 +34,7 @@
 	pci_devs_phb_init_dynamic(phb);
 
 	/* Create EEH devices for the PHB */
-	eeh_dev_phb_init_dynamic(phb);
+	eeh_phb_pe_create(phb);
 
 	if (dn->child)
 		pseries_eeh_init_edev_recursive(PCI_DN(dn));
diff --git a/arch/powerpc/platforms/pseries/pmem.c b/arch/powerpc/platforms/pseries/pmem.c
index f860a89..e1dc5d3 100644
--- a/arch/powerpc/platforms/pseries/pmem.c
+++ b/arch/powerpc/platforms/pseries/pmem.c
@@ -24,7 +24,6 @@
 #include <asm/topology.h>
 
 #include "pseries.h"
-#include "offline_states.h"
 
 static struct device_node *pmem_node;
 
@@ -147,6 +146,12 @@
 
 static int pseries_pmem_init(void)
 {
+	/*
+	 * Only supported on POWER8 and above.
+	 */
+	if (!cpu_has_feature(CPU_FTR_ARCH_207S))
+		return 0;
+
 	pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
 	if (!pmem_node)
 		return 0;
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 27094c8..2f4ee0a 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -359,7 +359,7 @@
  * to ever be a problem in practice we can move this into a kernel thread to
  * finish off the process later in boot.
  */
-void pseries_enable_reloc_on_exc(void)
+bool pseries_enable_reloc_on_exc(void)
 {
 	long rc;
 	unsigned int delay, total_delay = 0;
@@ -370,11 +370,13 @@
 			if (rc == H_P2) {
 				pr_info("Relocation on exceptions not"
 					" supported\n");
+				return false;
 			} else if (rc != H_SUCCESS) {
 				pr_warn("Unable to enable relocation"
 					" on exceptions: %ld\n", rc);
+				return false;
 			}
-			break;
+			return true;
 		}
 
 		delay = get_longbusy_msecs(rc);
@@ -383,7 +385,7 @@
 			pr_warn("Warning: Giving up waiting to enable "
 				"relocation on exceptions (%u msec)!\n",
 				total_delay);
-			return;
+			return false;
 		}
 
 		mdelay(delay);
@@ -746,6 +748,11 @@
 	smp_init_pseries();
 
 
+	if (radix_enabled() && !mmu_has_feature(MMU_FTR_GTSE))
+		if (!firmware_has_feature(FW_FEATURE_RPT_INVALIDATE))
+			panic("BUG: Radix support requires either GTSE or RPT_INVALIDATE\n");
+
+
 	/* openpic global configuration register (64-bit format). */
 	/* openpic Interrupt Source Unit pointer (64-bit format). */
 	/* python0 facility area (mmio) (64-bit format) REAL address. */
@@ -772,8 +779,10 @@
 	if (firmware_has_feature(FW_FEATURE_LPAR)) {
 		vpa_init(boot_cpuid);
 
-		if (lppaca_shared_proc(get_lppaca()))
+		if (lppaca_shared_proc(get_lppaca())) {
 			static_branch_enable(&shared_processor);
+			pv_spinlocks_init();
+		}
 
 		ppc_md.power_save = pseries_lpar_idle;
 		ppc_md.enable_pmcs = pseries_lpar_enable_pmcs;
@@ -832,12 +841,15 @@
 	return plpar_hcall_norets(H_SET_XDABR, dabr, dabrx);
 }
 
-static int pseries_set_dawr(unsigned long dawr, unsigned long dawrx)
+static int pseries_set_dawr(int nr, unsigned long dawr, unsigned long dawrx)
 {
 	/* PAPR says we can't set HYP */
 	dawrx &= ~DAWRX_HYP;
 
-	return  plpar_set_watchpoint0(dawr, dawrx);
+	if (nr == 0)
+		return plpar_set_watchpoint0(dawr, dawrx);
+	else
+		return plpar_set_watchpoint1(dawr, dawrx);
 }
 
 #define CMO_CHARACTERISTICS_TOKEN 44
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 6891710..9292249 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -44,8 +44,6 @@
 #include <asm/svm.h>
 
 #include "pseries.h"
-#include "offline_states.h"
-
 
 /*
  * The Primary thread of each non-boot processor was started from the OF client
@@ -108,10 +106,7 @@
 
 	/* Fixup atomic count: it exited inside IRQ handler. */
 	task_thread_info(paca_ptrs[lcpu]->__current)->preempt_count	= 0;
-#ifdef CONFIG_HOTPLUG_CPU
-	if (get_cpu_current_state(lcpu) == CPU_STATE_INACTIVE)
-		goto out;
-#endif
+
 	/* 
 	 * If the RTAS start-cpu token does not exist then presume the
 	 * cpu is already spinning.
@@ -126,9 +121,6 @@
 		return 0;
 	}
 
-#ifdef CONFIG_HOTPLUG_CPU
-out:
-#endif
 	return 1;
 }
 
@@ -143,10 +135,6 @@
 		vpa_init(cpu);
 
 	cpumask_clear_cpu(cpu, of_spin_mask);
-#ifdef CONFIG_HOTPLUG_CPU
-	set_cpu_current_state(cpu, CPU_STATE_ONLINE);
-	set_default_offline_state(cpu);
-#endif
 }
 
 static int smp_pSeries_kick_cpu(int nr)
@@ -163,20 +151,6 @@
 	 * the processor will continue on to secondary_start
 	 */
 	paca_ptrs[nr]->cpu_start = 1;
-#ifdef CONFIG_HOTPLUG_CPU
-	set_preferred_offline_state(nr, CPU_STATE_ONLINE);
-
-	if (get_cpu_current_state(nr) == CPU_STATE_INACTIVE) {
-		long rc;
-		unsigned long hcpuid;
-
-		hcpuid = get_hard_smp_processor_id(nr);
-		rc = plpar_hcall_norets(H_PROD, hcpuid);
-		if (rc != H_SUCCESS)
-			printk(KERN_ERR "Error: Prod to wake up processor %d "
-						"Ret= %ld\n", nr, rc);
-	}
-#endif
 
 	return 0;
 }
@@ -188,13 +162,16 @@
 	return 0;
 }
 
-static void smp_pseries_cause_ipi(int cpu)
+/* Cause IPI as setup by the interrupt controller (xics or xive) */
+static void (*ic_cause_ipi)(int cpu) __ro_after_init;
+
+/* Use msgsndp doorbells target is a sibling, else use interrupt controller */
+static void dbell_or_ic_cause_ipi(int cpu)
 {
-	/* POWER9 should not use this handler */
 	if (doorbell_try_core_ipi(cpu))
 		return;
 
-	icp_ops->cause_ipi(cpu);
+	ic_cause_ipi(cpu);
 }
 
 static int pseries_cause_nmi_ipi(int cpu)
@@ -218,26 +195,49 @@
 	return 0;
 }
 
-static __init void pSeries_smp_probe_xics(void)
-{
-	xics_smp_probe();
-
-	if (cpu_has_feature(CPU_FTR_DBELL) && !is_secure_guest())
-		smp_ops->cause_ipi = smp_pseries_cause_ipi;
-	else
-		smp_ops->cause_ipi = icp_ops->cause_ipi;
-}
-
 static __init void pSeries_smp_probe(void)
 {
 	if (xive_enabled())
-		/*
-		 * Don't use P9 doorbells when XIVE is enabled. IPIs
-		 * using MMIOs should be faster
-		 */
 		xive_smp_probe();
 	else
-		pSeries_smp_probe_xics();
+		xics_smp_probe();
+
+	/* No doorbell facility, must use the interrupt controller for IPIs */
+	if (!cpu_has_feature(CPU_FTR_DBELL))
+		return;
+
+	/* Doorbells can only be used for IPIs between SMT siblings */
+	if (!cpu_has_feature(CPU_FTR_SMT))
+		return;
+
+	if (is_kvm_guest()) {
+		/*
+		 * KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
+		 * faults to the hypervisor which then reads the instruction
+		 * from guest memory, which tends to be slower than using XIVE.
+		 */
+		if (xive_enabled())
+			return;
+
+		/*
+		 * XICS hcalls aren't as fast, so we can use msgsndp (which
+		 * also helps exercise KVM emulation), however KVM can't
+		 * emulate secure guests because it can't read the instruction
+		 * out of their memory.
+		 */
+		if (is_secure_guest())
+			return;
+	}
+
+	/*
+	 * Under PowerVM, FSCR[MSGP] is enabled as guest vCPU siblings are
+	 * gang scheduled on the same physical core, so doorbells are always
+	 * faster than the interrupt controller, and they can be used by
+	 * secure guests.
+	 */
+
+	ic_cause_ipi = smp_ops->cause_ipi;
+	smp_ops->cause_ipi = dbell_or_ic_cause_ipi;
 }
 
 static struct smp_ops_t pseries_smp_ops = {
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index 0a24a5a..81e0ac5 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -132,15 +132,11 @@
 			       struct device_attribute *attr,
 			       const char *buf, size_t count)
 {
-	cpumask_var_t offline_mask;
 	int rc;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
 
-	if (!alloc_cpumask_var(&offline_mask, GFP_KERNEL))
-		return -ENOMEM;
-
 	stream_id = simple_strtoul(buf, NULL, 16);
 
 	do {
@@ -149,33 +145,14 @@
 			ssleep(1);
 	} while (rc == -EAGAIN);
 
-	if (!rc) {
-		/* All present CPUs must be online */
-		cpumask_andnot(offline_mask, cpu_present_mask,
-				cpu_online_mask);
-		rc = rtas_online_cpus_mask(offline_mask);
-		if (rc) {
-			pr_err("%s: Could not bring present CPUs online.\n",
-					__func__);
-			goto out;
-		}
-
-		stop_topology_update();
+	if (!rc)
 		rc = pm_suspend(PM_SUSPEND_MEM);
-		start_topology_update();
-
-		/* Take down CPUs not online prior to suspend */
-		if (!rtas_offline_cpus_mask(offline_mask))
-			pr_warn("%s: Could not restore CPUs to offline "
-					"state.\n", __func__);
-	}
 
 	stream_id = 0;
 
 	if (!rc)
 		rc = count;
-out:
-	free_cpumask_var(offline_mask);
+
 	return rc;
 }
 
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
index 7c6d8b1..348f5958 100644
--- a/arch/powerpc/purgatory/Makefile
+++ b/arch/powerpc/purgatory/Makefile
@@ -2,11 +2,11 @@
 
 KASAN_SANITIZE := n
 
-targets += trampoline.o purgatory.ro kexec-purgatory.c
+targets += trampoline_$(BITS).o purgatory.ro kexec-purgatory.c
 
 LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined
 
-$(obj)/purgatory.ro: $(obj)/trampoline.o FORCE
+$(obj)/purgatory.ro: $(obj)/trampoline_$(BITS).o FORCE
 		$(call if_changed,ld)
 
 quiet_cmd_bin2c = BIN2C   $@
diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline_64.S
similarity index 70%
rename from arch/powerpc/purgatory/trampoline.S
rename to arch/powerpc/purgatory/trampoline_64.S
index a5a83c3..d956b8a 100644
--- a/arch/powerpc/purgatory/trampoline.S
+++ b/arch/powerpc/purgatory/trampoline_64.S
@@ -10,6 +10,7 @@
  */
 
 #include <asm/asm-compat.h>
+#include <asm/crashdump-ppc64.h>
 
 	.machine ppc64
 	.balign 256
@@ -43,14 +44,39 @@
 	mr	%r17,%r3	/* save cpu id to r17 */
 	mr	%r15,%r4	/* save physical address in reg15 */
 
+	/* Work out where we're running */
+	bcl	20, 31, 0f
+0:	mflr	%r18
+
+	/*
+	 * Copy BACKUP_SRC_SIZE bytes from BACKUP_SRC_START to
+	 * backup_start 8 bytes at a time.
+	 *
+	 * Use r3 = dest, r4 = src, r5 = size, r6 = count
+	 */
+	ld	%r3, (backup_start - 0b)(%r18)
+	cmpdi	%cr0, %r3, 0
+	beq	.Lskip_copy	/* skip if there is no backup region */
+	lis	%r5, BACKUP_SRC_SIZE@h
+	ori	%r5, %r5, BACKUP_SRC_SIZE@l
+	cmpdi	%cr0, %r5, 0
+	beq	.Lskip_copy	/* skip if copy size is zero */
+	lis	%r4, BACKUP_SRC_START@h
+	ori	%r4, %r4, BACKUP_SRC_START@l
+	li	%r6, 0
+.Lcopy_loop:
+	ldx	%r0, %r6, %r4
+	stdx	%r0, %r6, %r3
+	addi	%r6, %r6, 8
+	cmpld	%cr0, %r6, %r5
+	blt	.Lcopy_loop
+
+.Lskip_copy:
 	or	%r3,%r3,%r3	/* ok now to high priority, lets boot */
 	lis	%r6,0x1
 	mtctr	%r6		/* delay a bit for slaves to catch up */
 	bdnz	.		/* before we overwrite 0-100 again */
 
-	bl	0f		/* Work out where we're running */
-0:	mflr	%r18
-
 	/* load device-tree address */
 	ld	%r3, (dt_offset - 0b)(%r18)
 	mr	%r16,%r3	/* save dt address in reg16 */
@@ -61,6 +87,10 @@
 	li	%r4,28
 	STWX_BE	%r17,%r3,%r4	/* Store my cpu as __be32 at byte 28 */
 1:
+	/* Load opal base and entry values in r8 & r9 respectively */
+	ld	%r8,(opal_base - 0b)(%r18)
+	ld	%r9,(opal_entry - 0b)(%r18)
+
 	/* load the kernel address */
 	ld	%r4,(kernel - 0b)(%r18)
 
@@ -89,7 +119,6 @@
 
 	rfid			/* update MSR and start kernel */
 
-
 	.balign 8
 	.globl kernel
 kernel:
@@ -102,6 +131,23 @@
 	.8byte  0x0
 	.size dt_offset, . - dt_offset
 
+	.balign 8
+	.globl backup_start
+backup_start:
+	.8byte  0x0
+	.size backup_start, . - backup_start
+
+	.balign 8
+	.globl opal_base
+opal_base:
+	.8byte  0x0
+	.size opal_base, . - opal_base
+
+	.balign 8
+	.globl opal_entry
+opal_entry:
+	.8byte  0x0
+	.size opal_entry, . - opal_entry
 
 	.data
 	.balign 8
diff --git a/arch/powerpc/sysdev/xive/native.c b/arch/powerpc/sysdev/xive/native.c
index 71b881e..cb58ec7c 100644
--- a/arch/powerpc/sysdev/xive/native.c
+++ b/arch/powerpc/sysdev/xive/native.c
@@ -18,6 +18,7 @@
 #include <linux/delay.h>
 #include <linux/cpumask.h>
 #include <linux/mm.h>
+#include <linux/kmemleak.h>
 
 #include <asm/machdep.h>
 #include <asm/prom.h>
@@ -647,6 +648,7 @@
 			pr_err("Failed to allocate provisioning page\n");
 			return false;
 		}
+		kmemleak_ignore(p);
 		opal_xive_donate_page(chip, __pa(p));
 	}
 	return true;
diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c
index f0551a2..1e3674d 100644
--- a/arch/powerpc/sysdev/xive/spapr.c
+++ b/arch/powerpc/sysdev/xive/spapr.c
@@ -768,7 +768,7 @@
 	return vec5 + index;
 }
 
-static bool xive_spapr_disabled(void)
+static bool __init xive_spapr_disabled(void)
 {
 	const u8 *vec5_xive;
 
diff --git a/arch/powerpc/tools/unrel_branch_check.sh b/arch/powerpc/tools/unrel_branch_check.sh
index 7711475..6e6a30a 100755
--- a/arch/powerpc/tools/unrel_branch_check.sh
+++ b/arch/powerpc/tools/unrel_branch_check.sh
@@ -31,7 +31,10 @@
 grep -v '\<__start_initialization_multiplatform>' |
 grep -v -e 'b.\?.\?ctr' |
 grep -v -e 'b.\?.\?lr' |
-sed 's/://' |
+sed -e 's/\bbt.\?[[:space:]]*[[:digit:]][[:digit:]]*,/beq/' \
+	-e 's/\bbf.\?[[:space:]]*[[:digit:]][[:digit:]]*,/bne/' \
+	-e 's/[[:space:]]0x/ /' \
+	-e 's/://' |
 awk '{ print $1 ":" $6 ":0x" $7 ":" $8 " "}'
 )
 
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index 7efe4bc..df7bca0 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -481,6 +481,13 @@
 #endif
 }
 
+static void xmon_touch_watchdogs(void)
+{
+	touch_softlockup_watchdog_sync();
+	rcu_cpu_stall_reset();
+	touch_nmi_watchdog();
+}
+
 static int xmon_core(struct pt_regs *regs, int fromipi)
 {
 	int cmd = 0;
@@ -718,7 +725,7 @@
 	else
 		insert_cpu_bpts();
 
-	touch_nmi_watchdog();
+	xmon_touch_watchdogs();
 	local_irq_restore(flags);
 
 	return cmd != 'X' && cmd != EOF;
@@ -1593,6 +1600,7 @@
 	case 0x1300:	ret = "(Instruction Breakpoint)"; break;
 	case 0x1500:	ret = "(Denormalisation)"; break;
 	case 0x1700:	ret = "(Altivec Assist)"; break;
+	case 0x3000:	ret = "(System Call Vectored)"; break;
 	default: ret = "";
 	}
 	return ret;
@@ -1861,7 +1869,7 @@
 		catch_memory_errors = 1;
 		sync();
 
-		if (cmd != 'i') {
+		if (cmd != 'i' || IS_ENABLED(CONFIG_PPC_BOOK3S_64)) {
 			for (; nflush > 0; --nflush, adrs += L1_CACHE_BYTES)
 				cflush((void *) adrs);
 		} else {
@@ -2022,6 +2030,18 @@
 #endif
 }
 
+static void dump_310_sprs(void)
+{
+#ifdef CONFIG_PPC64
+	if (!cpu_has_feature(CPU_FTR_ARCH_31))
+		return;
+
+	printf("mmcr3  = %.16lx, sier2  = %.16lx, sier3  = %.16lx\n",
+		mfspr(SPRN_MMCR3), mfspr(SPRN_SIER2), mfspr(SPRN_SIER3));
+
+#endif
+}
+
 static void dump_one_spr(int spr, bool show_unimplemented)
 {
 	unsigned long val;
@@ -2076,6 +2096,7 @@
 		dump_206_sprs();
 		dump_207_sprs();
 		dump_300_sprs();
+		dump_310_sprs();
 
 		return;
 	}
@@ -2934,11 +2955,10 @@
 	int nr, dotted;
 	unsigned long first_adr;
 	struct ppc_inst inst, last_inst = ppc_inst(0);
-	unsigned char val[4];
 
 	dotted = 0;
-	for (first_adr = adr; count > 0; --count, adr += 4) {
-		nr = mread(adr, val, 4);
+	for (first_adr = adr; count > 0; --count, adr += ppc_inst_len(inst)) {
+		nr = mread_instr(adr, &inst);
 		if (nr == 0) {
 			if (praddr) {
 				const char *x = fault_chars[fault_type];
@@ -2946,7 +2966,6 @@
 			}
 			break;
 		}
-		inst = ppc_inst(GETWORD(val));
 		if (adr > first_adr && ppc_inst_equal(inst, last_inst)) {
 			if (!dotted) {
 				printf(" ...\n");
@@ -2957,9 +2976,12 @@
 		dotted = 0;
 		last_inst = inst;
 		if (praddr)
-			printf(REG"  %.8x", adr, ppc_inst_val(inst));
+			printf(REG"  %s", adr, ppc_inst_as_str(inst));
 		printf("\t");
-		dump_func(ppc_inst_val(inst), adr);
+		if (!ppc_inst_prefixed(inst))
+			dump_func(ppc_inst_val(inst), adr);
+		else
+			dump_func(ppc_inst_as_u64(inst), adr);
 		printf("\n");
 	}
 	return adr - first_adr;
@@ -4256,7 +4278,7 @@
 		subcmd = inchar();
 		if (isxdigit(subcmd) || subcmd == '\n')
 			termch = subcmd;
-		/* fall through */
+		fallthrough;
 	case 'f':
 		scanhex(&num);
 		if (num >= XMON_NUM_SPUS || !spu_info[num].spu) {
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 1b299e80..addaa6e 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -244,20 +244,6 @@
 	stop_psscr_table[index].mask = psscr_mask;
 }
 
-/*
- * Returns 0 if prop1_len == prop2_len. Else returns -1
- */
-static inline int validate_dt_prop_sizes(const char *prop1, int prop1_len,
-					 const char *prop2, int prop2_len)
-{
-	if (prop1_len == prop2_len)
-		return 0;
-
-	pr_warn("cpuidle-powernv: array sizes don't match for %s and %s\n",
-		prop1, prop2);
-	return -1;
-}
-
 extern u32 pnv_get_supported_cpuidle_states(void);
 static int powernv_add_idle_states(void)
 {
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 6513ef2..ff6d99e 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -21,8 +21,9 @@
 #include <asm/runlatch.h>
 #include <asm/idle.h>
 #include <asm/plpar_wrappers.h>
+#include <asm/rtas.h>
 
-struct cpuidle_driver pseries_idle_driver = {
+static struct cpuidle_driver pseries_idle_driver = {
 	.name             = "pseries_idle",
 	.owner            = THIS_MODULE,
 };
@@ -86,19 +87,150 @@
 	}
 }
 
+/*
+ * XCEDE: Extended CEDE states discovered through the
+ *        "ibm,get-systems-parameter" RTAS call with the token
+ *        CEDE_LATENCY_TOKEN
+ */
+
+/*
+ * Section 7.3.16 System Parameters Option of PAPR version 2.8.1 has a
+ * table with all the parameters to ibm,get-system-parameters.
+ * CEDE_LATENCY_TOKEN corresponds to the token value for Cede Latency
+ * Settings Information.
+ */
+#define CEDE_LATENCY_TOKEN	45
+
+/*
+ * If the platform supports the cede latency settings information system
+ * parameter it must provide the following information in the NULL terminated
+ * parameter string:
+ *
+ * a. The first byte is the length “N” of each cede latency setting record minus
+ *    one (zero indicates a length of 1 byte).
+ *
+ * b. For each supported cede latency setting a cede latency setting record
+ *    consisting of the first “N” bytes as per the following table.
+ *
+ *    -----------------------------
+ *    | Field           | Field   |
+ *    | Name            | Length  |
+ *    -----------------------------
+ *    | Cede Latency    | 1 Byte  |
+ *    | Specifier Value |         |
+ *    -----------------------------
+ *    | Maximum wakeup  |         |
+ *    | latency in      | 8 Bytes |
+ *    | tb-ticks        |         |
+ *    -----------------------------
+ *    | Responsive to   |         |
+ *    | external        | 1 Byte  |
+ *    | interrupts      |         |
+ *    -----------------------------
+ *
+ * This version has cede latency record size = 10.
+ *
+ * The structure xcede_latency_payload represents a) and b) with
+ * xcede_latency_record representing the table in b).
+ *
+ * xcede_latency_parameter is what gets returned by
+ * ibm,get-systems-parameter RTAS call when made with
+ * CEDE_LATENCY_TOKEN.
+ *
+ * These structures are only used to represent the data obtained by the RTAS
+ * call. The data is in big-endian.
+ */
+struct xcede_latency_record {
+	u8	hint;
+	__be64	latency_ticks;
+	u8	wake_on_irqs;
+} __packed;
+
+// Make space for 16 records, which "should be enough".
+struct xcede_latency_payload {
+	u8     record_size;
+	struct xcede_latency_record records[16];
+} __packed;
+
+struct xcede_latency_parameter {
+	__be16  payload_size;
+	struct xcede_latency_payload payload;
+	u8 null_char;
+} __packed;
+
+static unsigned int nr_xcede_records;
+static struct xcede_latency_parameter xcede_latency_parameter __initdata;
+
+static int __init parse_cede_parameters(void)
+{
+	struct xcede_latency_payload *payload;
+	u32 total_xcede_records_size;
+	u8 xcede_record_size;
+	u16 payload_size;
+	int ret, i;
+
+	ret = rtas_call(rtas_token("ibm,get-system-parameter"), 3, 1,
+			NULL, CEDE_LATENCY_TOKEN, __pa(&xcede_latency_parameter),
+			sizeof(xcede_latency_parameter));
+	if (ret) {
+		pr_err("xcede: Error parsing CEDE_LATENCY_TOKEN\n");
+		return ret;
+	}
+
+	payload_size = be16_to_cpu(xcede_latency_parameter.payload_size);
+	payload = &xcede_latency_parameter.payload;
+
+	xcede_record_size = payload->record_size + 1;
+
+	if (xcede_record_size != sizeof(struct xcede_latency_record)) {
+		pr_err("xcede: Expected record-size %lu. Observed size %u.\n",
+		       sizeof(struct xcede_latency_record), xcede_record_size);
+		return -EINVAL;
+	}
+
+	pr_info("xcede: xcede_record_size = %d\n", xcede_record_size);
+
+	/*
+	 * Since the payload_size includes the last NULL byte and the
+	 * xcede_record_size, the remaining bytes correspond to array of all
+	 * cede_latency settings.
+	 */
+	total_xcede_records_size = payload_size - 2;
+	nr_xcede_records = total_xcede_records_size / xcede_record_size;
+
+	for (i = 0; i < nr_xcede_records; i++) {
+		struct xcede_latency_record *record = &payload->records[i];
+		u64 latency_ticks = be64_to_cpu(record->latency_ticks);
+		u8 wake_on_irqs = record->wake_on_irqs;
+		u8 hint = record->hint;
+
+		pr_info("xcede: Record %d : hint = %u, latency = 0x%llx tb ticks, Wake-on-irq = %u\n",
+			i, hint, latency_ticks, wake_on_irqs);
+	}
+
+	return 0;
+}
+
+#define NR_DEDICATED_STATES	2 /* snooze, CEDE */
+static u8 cede_latency_hint[NR_DEDICATED_STATES];
+
 static int dedicated_cede_loop(struct cpuidle_device *dev,
 				struct cpuidle_driver *drv,
 				int index)
 {
+	u8 old_latency_hint;
 
 	pseries_idle_prolog();
 	get_lppaca()->donate_dedicated_cpu = 1;
+	old_latency_hint = get_lppaca()->cede_latency_hint;
+	get_lppaca()->cede_latency_hint = cede_latency_hint[index];
 
 	HMT_medium();
 	check_and_cede_processor();
 
 	local_irq_disable();
 	get_lppaca()->donate_dedicated_cpu = 0;
+	get_lppaca()->cede_latency_hint = old_latency_hint;
 
 	pseries_idle_epilog();
 
@@ -130,7 +262,7 @@
 /*
  * States for dedicated partition case.
  */
-static struct cpuidle_state dedicated_states[] = {
+static struct cpuidle_state dedicated_states[NR_DEDICATED_STATES] = {
 	{ /* Snooze */
 		.name = "snooze",
 		.desc = "snooze",
@@ -211,6 +343,54 @@
 	return 0;
 }
 
+static void __init fixup_cede0_latency(void)
+{
+	struct xcede_latency_payload *payload;
+	u64 min_latency_us;
+	int i;
+
+	min_latency_us = dedicated_states[1].exit_latency; // CEDE latency
+
+	if (parse_cede_parameters())
+		return;
+
+	pr_info("cpuidle: Skipping the %d Extended CEDE idle states\n",
+		nr_xcede_records);
+
+	payload = &xcede_latency_parameter.payload;
+	for (i = 0; i < nr_xcede_records; i++) {
+		struct xcede_latency_record *record = &payload->records[i];
+		u64 latency_tb = be64_to_cpu(record->latency_ticks);
+		u64 latency_us = tb_to_ns(latency_tb) / NSEC_PER_USEC;
+
+		if (latency_us < min_latency_us)
+			min_latency_us = latency_us;
+	}
+
+	/*
+	 * By default, we assume that CEDE(0) has exit latency 10us,
+	 * since there is no way for us to query from the platform.
+	 *
+	 * However, if the wakeup latency of an Extended CEDE state is
+	 * smaller than 10us, then we can be sure that CEDE(0)
+	 * requires no more than that.
+	 *
+	 * Perform the fix-up.
+	 */
+	if (min_latency_us < dedicated_states[1].exit_latency) {
+		u64 cede0_latency = min_latency_us - 1;
+
+		if (cede0_latency <= 0)
+			cede0_latency = min_latency_us;
+
+		dedicated_states[1].exit_latency = cede0_latency;
+		dedicated_states[1].target_residency = 10 * (cede0_latency);
+		pr_info("cpuidle: Fixed up CEDE exit latency to %llu us\n",
+			cede0_latency);
+	}
+
+}
+
 /*
  * pseries_idle_probe()
  * Choose state table for shared versus dedicated partition
@@ -232,8 +412,9 @@
 			cpuidle_state_table = shared_states;
 			max_idle_state = ARRAY_SIZE(shared_states);
 		} else {
+			fixup_cede0_latency();
 			cpuidle_state_table = dedicated_states;
-			max_idle_state = ARRAY_SIZE(dedicated_states);
+			max_idle_state = NR_DEDICATED_STATES;
 		}
 	} else
 		return -ENODEV;
diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl
index db87436..50a0a18 100644
--- a/drivers/crypto/vmx/aesp8-ppc.pl
+++ b/drivers/crypto/vmx/aesp8-ppc.pl
@@ -50,7 +50,7 @@
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
+# details see https://www.openssl.org/~appro/cryptogams/.
 # ====================================================================
 #
 # This module implements support for AES instructions as per PowerISA
diff --git a/drivers/crypto/vmx/ghashp8-ppc.pl b/drivers/crypto/vmx/ghashp8-ppc.pl
index 38b0650..09bba18 100644
--- a/drivers/crypto/vmx/ghashp8-ppc.pl
+++ b/drivers/crypto/vmx/ghashp8-ppc.pl
@@ -13,7 +13,7 @@
 # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
 # project. The module is, however, dual licensed under OpenSSL and
 # CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
+# details see https://www.openssl.org/~appro/cryptogams/.
 # ====================================================================
 #
 # GHASH for for PowerISA v2.07.
diff --git a/drivers/macintosh/adb-iop.c b/drivers/macintosh/adb-iop.c
index fca3164..f3d1a46 100644
--- a/drivers/macintosh/adb-iop.c
+++ b/drivers/macintosh/adb-iop.c
@@ -7,10 +7,6 @@
  * 1999-07-01 (jmt) - First implementation for new driver architecture.
  *
  * 1999-07-31 (jmt) - First working version.
- *
- * TODO:
- *
- * o Implement SRQ handling.
  */
 
 #include <linux/types.h>
@@ -18,24 +14,17 @@
 #include <linux/mm.h>
 #include <linux/delay.h>
 #include <linux/init.h>
-#include <linux/proc_fs.h>
 
 #include <asm/macintosh.h>
 #include <asm/macints.h>
 #include <asm/mac_iop.h>
-#include <asm/mac_oss.h>
 #include <asm/adb_iop.h>
 
 #include <linux/adb.h>
 
-/*#define DEBUG_ADB_IOP*/
-
 static struct adb_request *current_req;
 static struct adb_request *last_req;
-#if 0
-static unsigned char reply_buff[16];
-static unsigned char *reply_ptr;
-#endif
+static unsigned int autopoll_devs;
 
 static enum adb_iop_state {
 	idle,
@@ -62,13 +51,19 @@
 	.reset_bus    = adb_iop_reset_bus
 };
 
-static void adb_iop_end_req(struct adb_request *req, int state)
+static void adb_iop_done(void)
 {
+	struct adb_request *req = current_req;
+
+	adb_iop_state = idle;
+
 	req->complete = 1;
 	current_req = req->next;
 	if (req->done)
 		(*req->done)(req);
-	adb_iop_state = state;
+
+	if (adb_iop_state == idle)
+		adb_iop_start();
 }
 
 /*
@@ -79,15 +74,14 @@
 
 static void adb_iop_complete(struct iop_msg *msg)
 {
-	struct adb_request *req;
 	unsigned long flags;
 
 	local_irq_save(flags);
 
-	req = current_req;
-	if ((adb_iop_state == sending) && req && req->reply_expected) {
+	if (current_req->reply_expected)
 		adb_iop_state = awaiting_reply;
-	}
+	else
+		adb_iop_done();
 
 	local_irq_restore(flags);
 }
@@ -102,52 +96,36 @@
 static void adb_iop_listen(struct iop_msg *msg)
 {
 	struct adb_iopmsg *amsg = (struct adb_iopmsg *)msg->message;
-	struct adb_request *req;
 	unsigned long flags;
-#ifdef DEBUG_ADB_IOP
-	int i;
-#endif
+	bool req_done = false;
 
 	local_irq_save(flags);
 
-	req = current_req;
+	/* Handle a timeout. Timeout packets seem to occur even after
+	 * we've gotten a valid reply to a TALK, presumably because of
+	 * autopolling.
+	 */
 
-#ifdef DEBUG_ADB_IOP
-	printk("adb_iop_listen %p: rcvd packet, %d bytes: %02X %02X", req,
-	       (uint)amsg->count + 2, (uint)amsg->flags, (uint)amsg->cmd);
-	for (i = 0; i < amsg->count; i++)
-		printk(" %02X", (uint)amsg->data[i]);
-	printk("\n");
-#endif
+	if (amsg->flags & ADB_IOP_EXPLICIT) {
+		if (adb_iop_state == awaiting_reply) {
+			struct adb_request *req = current_req;
 
-	/* Handle a timeout. Timeout packets seem to occur even after */
-	/* we've gotten a valid reply to a TALK, so I'm assuming that */
-	/* a "timeout" is actually more like an "end-of-data" signal. */
-	/* We need to send back a timeout packet to the IOP to shut   */
-	/* it up, plus complete the current request, if any.          */
-
-	if (amsg->flags & ADB_IOP_TIMEOUT) {
-		msg->reply[0] = ADB_IOP_TIMEOUT | ADB_IOP_AUTOPOLL;
-		msg->reply[1] = 0;
-		msg->reply[2] = 0;
-		if (req && (adb_iop_state != idle)) {
-			adb_iop_end_req(req, idle);
-		}
-	} else {
-		/* TODO: is it possible for more than one chunk of data  */
-		/*       to arrive before the timeout? If so we need to */
-		/*       use reply_ptr here like the other drivers do.  */
-		if ((adb_iop_state == awaiting_reply) &&
-		    (amsg->flags & ADB_IOP_EXPLICIT)) {
 			req->reply_len = amsg->count + 1;
 			memcpy(req->reply, &amsg->cmd, req->reply_len);
-		} else {
-			adb_input(&amsg->cmd, amsg->count + 1,
-				  amsg->flags & ADB_IOP_AUTOPOLL);
+
+			req_done = true;
 		}
-		memcpy(msg->reply, msg->message, IOP_MSG_LEN);
+	} else if (!(amsg->flags & ADB_IOP_TIMEOUT)) {
+		adb_input(&amsg->cmd, amsg->count + 1,
+			  amsg->flags & ADB_IOP_AUTOPOLL);
 	}
+
+	msg->reply[0] = autopoll_devs ? ADB_IOP_AUTOPOLL : 0;
 	iop_complete_message(msg);
+
+	if (req_done)
+		adb_iop_done();
+
 	local_irq_restore(flags);
 }
 
@@ -160,63 +138,50 @@
 
 static void adb_iop_start(void)
 {
-	unsigned long flags;
 	struct adb_request *req;
 	struct adb_iopmsg amsg;
-#ifdef DEBUG_ADB_IOP
-	int i;
-#endif
 
 	/* get the packet to send */
 	req = current_req;
 	if (!req)
 		return;
 
-	local_irq_save(flags);
-
-#ifdef DEBUG_ADB_IOP
-	printk("adb_iop_start %p: sending packet, %d bytes:", req, req->nbytes);
-	for (i = 0; i < req->nbytes; i++)
-		printk(" %02X", (uint)req->data[i]);
-	printk("\n");
-#endif
-
-	/* The IOP takes MacII-style packets, so */
-	/* strip the initial ADB_PACKET byte.    */
-
+	/* The IOP takes MacII-style packets, so strip the initial
+	 * ADB_PACKET byte.
+	 */
 	amsg.flags = ADB_IOP_EXPLICIT;
 	amsg.count = req->nbytes - 2;
 
-	/* amsg.data immediately follows amsg.cmd, effectively making */
-	/* amsg.cmd a pointer to the beginning of a full ADB packet.  */
+	/* amsg.data immediately follows amsg.cmd, effectively making
+	 * &amsg.cmd a pointer to the beginning of a full ADB packet.
+	 */
 	memcpy(&amsg.cmd, req->data + 1, req->nbytes - 1);
 
 	req->sent = 1;
 	adb_iop_state = sending;
-	local_irq_restore(flags);
 
-	/* Now send it. The IOP manager will call adb_iop_complete */
-	/* when the packet has been sent.                          */
-
+	/* Now send it. The IOP manager will call adb_iop_complete
+	 * when the message has been sent.
+	 */
 	iop_send_message(ADB_IOP, ADB_CHAN, req, sizeof(amsg), (__u8 *)&amsg,
 			 adb_iop_complete);
 }
 
-int adb_iop_probe(void)
+static int adb_iop_probe(void)
 {
 	if (!iop_ism_present)
 		return -ENODEV;
 	return 0;
 }
 
-int adb_iop_init(void)
+static int adb_iop_init(void)
 {
 	pr_info("adb: IOP ISM driver v0.4 for Unified ADB\n");
 	iop_listen(ADB_IOP, ADB_CHAN, adb_iop_listen, "ADB");
 	return 0;
 }
 
-int adb_iop_send_request(struct adb_request *req, int sync)
+static int adb_iop_send_request(struct adb_request *req, int sync)
 {
 	int err;
 
@@ -240,14 +205,14 @@
 		return -EINVAL;
 	}
 
-	local_irq_save(flags);
-
 	req->next = NULL;
 	req->sent = 0;
 	req->complete = 0;
 	req->reply_len = 0;
 
-	if (current_req != 0) {
+	local_irq_save(flags);
+
+	if (current_req) {
 		last_req->next = req;
 		last_req = req;
 	} else {
@@ -255,39 +220,58 @@
 		last_req = req;
 	}
 
+	if (adb_iop_state == idle)
+		adb_iop_start();
+
 	local_irq_restore(flags);
 
-	if (adb_iop_state == idle)
-		adb_iop_start();
 	return 0;
 }
 
-int adb_iop_autopoll(int devs)
+static void adb_iop_set_ap_complete(struct iop_msg *msg)
 {
-	/* TODO: how do we enable/disable autopoll? */
+	struct adb_iopmsg *amsg = (struct adb_iopmsg *)msg->message;
+
+	autopoll_devs = (amsg->data[1] << 8) | amsg->data[0];
+}
+
+static int adb_iop_autopoll(int devs)
+{
+	struct adb_iopmsg amsg;
+	unsigned long flags;
+	unsigned int mask = (unsigned int)devs & 0xFFFE;
+
+	local_irq_save(flags);
+
+	amsg.flags = ADB_IOP_SET_AUTOPOLL | (mask ? ADB_IOP_AUTOPOLL : 0);
+	amsg.count = 2;
+	amsg.cmd = 0;
+	amsg.data[0] = mask & 0xFF;
+	amsg.data[1] = (mask >> 8) & 0xFF;
+
+	iop_send_message(ADB_IOP, ADB_CHAN, NULL, sizeof(amsg), (__u8 *)&amsg,
+			 adb_iop_set_ap_complete);
+
+	local_irq_restore(flags);
+
 	return 0;
 }
 
-void adb_iop_poll(void)
+static void adb_iop_poll(void)
 {
-	if (adb_iop_state == idle)
-		adb_iop_start();
 	iop_ism_irq_poll(ADB_IOP);
 }
 
-int adb_iop_reset_bus(void)
+static int adb_iop_reset_bus(void)
 {
-	struct adb_request req = {
-		.reply_expected = 0,
-		.nbytes = 2,
-		.data = { ADB_PACKET, 0 },
-	};
+	struct adb_request req;
 
-	adb_iop_write(&req);
-	while (!req.complete) {
-		adb_iop_poll();
-		schedule();
-	}
+	/* Command = 0, Address = ignored */
+	adb_request(&req, NULL, ADBREQ_NOSEND, 1, ADB_BUSRESET);
+	adb_iop_send_request(&req, 1);
+
+	/* Don't want any more requests during the Global Reset low time. */
+	mdelay(3);
 
 	return 0;
 }
diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
index e49d1f2..73b3961 100644
--- a/drivers/macintosh/adb.c
+++ b/drivers/macintosh/adb.c
@@ -163,7 +163,7 @@
 			 * See if anybody actually moved. This is suggested
 			 * by HW TechNote 01:
 			 *
-			 * http://developer.apple.com/technotes/hw/hw_01.html
+			 * https://developer.apple.com/technotes/hw/hw_01.html
 			 */
 			adb_request(&req, NULL, ADBREQ_SYNC | ADBREQ_REPLY, 1,
 				    (highFree << 4) | 0xf);
diff --git a/drivers/macintosh/therm_adt746x.c b/drivers/macintosh/therm_adt746x.c
index 8f7725d..7e21843 100644
--- a/drivers/macintosh/therm_adt746x.c
+++ b/drivers/macintosh/therm_adt746x.c
@@ -5,8 +5,8 @@
  * Copyright (C) 2003, 2004 Colin Leroy, Rasmus Rohde, Benjamin Herrenschmidt
  *
  * Documentation from 115254175ADT7467_pra.pdf and 3686221171167ADT7460_b.pdf
- * http://www.onsemi.com/PowerSolutions/product.do?id=ADT7467
- * http://www.onsemi.com/PowerSolutions/product.do?id=ADT7460
+ * https://www.onsemi.com/PowerSolutions/product.do?id=ADT7467
+ * https://www.onsemi.com/PowerSolutions/product.do?id=ADT7460
  *
  */
 
diff --git a/drivers/macintosh/via-macii.c b/drivers/macintosh/via-macii.c
index ac824d7..060e03f 100644
--- a/drivers/macintosh/via-macii.c
+++ b/drivers/macintosh/via-macii.c
@@ -77,6 +77,12 @@
 #define ST_ODD		0x20		/* ADB state: odd data byte */
 #define ST_IDLE		0x30		/* ADB state: idle, nothing to send */
 
+/* ADB command byte structure */
+#define ADDR_MASK	0xF0
+#define CMD_MASK	0x0F
+#define OP_MASK		0x0C
+#define TALK		0x0C
+
 static int macii_init_via(void);
 static void macii_start(void);
 static irqreturn_t macii_interrupt(int irq, void *arg);
@@ -104,21 +110,22 @@
 	idle,
 	sending,
 	reading,
-	read_done,
 } macii_state;
 
 static struct adb_request *current_req; /* first request struct in the queue */
 static struct adb_request *last_req;     /* last request struct in the queue */
 static unsigned char reply_buf[16];        /* storage for autopolled replies */
 static unsigned char *reply_ptr;     /* next byte in reply_buf or req->reply */
-static int reading_reply;        /* store reply in reply_buf else req->reply */
+static bool reading_reply;       /* store reply in reply_buf else req->reply */
 static int data_index;      /* index of the next byte to send from req->data */
 static int reply_len; /* number of bytes received in reply_buf or req->reply */
 static int status;          /* VIA's ADB status bits captured upon interrupt */
-static int last_status;              /* status bits as at previous interrupt */
-static int srq_asserted;     /* have to poll for the device that asserted it */
-static int command_byte;         /* the most recent command byte transmitted */
-static int autopoll_devs;      /* bits set are device addresses to be polled */
+static bool bus_timeout;                   /* no data was sent by the device */
+static bool srq_asserted;    /* have to poll for the device that asserted it */
+static u8 last_cmd;              /* the most recent command byte transmitted */
+static u8 last_talk_cmd;    /* the most recent Talk command byte transmitted */
+static u8 last_poll_cmd; /* the most recent Talk R0 command byte transmitted */
+static unsigned int autopoll_devs;  /* bits set are device addresses to poll */
 
 /* Check for MacII style ADB */
 static int macii_probe(void)
@@ -133,7 +140,7 @@
 }
 
 /* Initialize the driver */
-int macii_init(void)
+static int macii_init(void)
 {
 	unsigned long flags;
 	int err;
@@ -165,7 +172,6 @@
 
 	/* Set up state: idle */
 	via[B] |= ST_IDLE;
-	last_status = via[B] & (ST_MASK | CTLR_IRQ);
 
 	/* Shift register on input */
 	via[ACR] = (via[ACR] & ~SR_CTRL) | SR_EXT;
@@ -179,35 +185,49 @@
 /* Send an ADB poll (Talk Register 0 command prepended to the request queue) */
 static void macii_queue_poll(void)
 {
-	/* No point polling the active device as it will never assert SRQ, so
-	 * poll the next device in the autopoll list. This could leave us
-	 * stuck in a polling loop if an unprobed device is asserting SRQ.
-	 * In theory, that could only happen if a device was plugged in after
-	 * probing started. Unplugging it again will break the cycle.
-	 * (Simply polling the next higher device often ends up polling almost
-	 * every device (after wrapping around), which takes too long.)
-	 */
-	int device_mask;
-	int next_device;
 	static struct adb_request req;
+	unsigned char poll_command;
+	unsigned int poll_addr;
 
+	/* This only polls devices in the autopoll list, which assumes that
+	 * unprobed devices never assert SRQ. That could happen if a device was
+	 * plugged in after the adb bus scan. Unplugging it again will resolve
+	 * the problem. This behaviour is similar to MacOS.
+	 */
 	if (!autopoll_devs)
 		return;
 
-	device_mask = (1 << (((command_byte & 0xF0) >> 4) + 1)) - 1;
-	if (autopoll_devs & ~device_mask)
-		next_device = ffs(autopoll_devs & ~device_mask) - 1;
-	else
-		next_device = ffs(autopoll_devs) - 1;
+	/* The device most recently polled may not be the best device to poll
+	 * right now. Some other device(s) may have signalled SRQ (the active
+	 * device won't do that). Or the autopoll list may have been changed.
+	 * Try polling the next higher address.
+	 */
+	poll_addr = (last_poll_cmd & ADDR_MASK) >> 4;
+	if ((srq_asserted && last_cmd == last_poll_cmd) ||
+	    !(autopoll_devs & (1 << poll_addr))) {
+		unsigned int higher_devs;
 
-	adb_request(&req, NULL, ADBREQ_NOSEND, 1, ADB_READREG(next_device, 0));
+		higher_devs = autopoll_devs & -(1 << (poll_addr + 1));
+		poll_addr = ffs(higher_devs ? higher_devs : autopoll_devs) - 1;
+	}
+
+	/* Send a Talk Register 0 command */
+	poll_command = ADB_READREG(poll_addr, 0);
+
+	/* No need to repeat this Talk command. The transceiver will do that
+	 * as long as it is idle.
+	 */
+	if (poll_command == last_cmd)
+		return;
+
+	adb_request(&req, NULL, ADBREQ_NOSEND, 1, poll_command);
 
 	req.sent = 0;
 	req.complete = 0;
 	req.reply_len = 0;
 	req.next = current_req;
 
-	if (current_req != NULL) {
+	if (WARN_ON(current_req)) {
 		current_req = &req;
 	} else {
 		current_req = &req;
@@ -266,40 +286,22 @@
 /* Start auto-polling */
 static int macii_autopoll(int devs)
 {
-	static struct adb_request req;
 	unsigned long flags;
-	int err = 0;
-
-	/* bit 1 == device 1, and so on. */
-	autopoll_devs = devs & 0xFFFE;
-
-	if (!autopoll_devs)
-		return 0;
 
 	local_irq_save(flags);
 
-	if (current_req == NULL) {
-		/* Send a Talk Reg 0. The controller will repeatedly transmit
-		 * this as long as it is idle.
-		 */
-		adb_request(&req, NULL, ADBREQ_NOSEND, 1,
-		            ADB_READREG(ffs(autopoll_devs) - 1, 0));
-		err = macii_write(&req);
+	/* bit 1 == device 1, and so on. */
+	autopoll_devs = (unsigned int)devs & 0xFFFE;
+
+	if (!current_req) {
+		macii_queue_poll();
+		if (current_req && macii_state == idle)
+			macii_start();
 	}
 
 	local_irq_restore(flags);
-	return err;
-}
 
-static inline int need_autopoll(void)
-{
-	/* Was the last command Talk Reg 0
-	 * and is the target on the autopoll list?
-	 */
-	if ((command_byte & 0x0F) == 0x0C &&
-	    ((1 << ((command_byte & 0xF0) >> 4)) & autopoll_devs))
-		return 0;
-	return 1;
+	return 0;
 }
 
 /* Prod the chip without interrupts */
@@ -311,7 +313,7 @@
 /* Reset the bus */
 static int macii_reset_bus(void)
 {
-	static struct adb_request req;
+	struct adb_request req;
 
 	/* Command = 0, Address = ignored */
 	adb_request(&req, NULL, ADBREQ_NOSEND, 1, ADB_BUSRESET);
@@ -335,8 +337,6 @@
 	 * And req->nbytes is the number of bytes of real data plus one.
 	 */
 
-	/* store command byte */
-	command_byte = req->data[1];
 	/* Output mode */
 	via[ACR] |= SR_OUT;
 	/* Load data */
@@ -346,6 +346,9 @@
 
 	macii_state = sending;
 	data_index = 2;
+
+	bus_timeout = false;
+	srq_asserted = false;
 }
 
 /*
@@ -354,15 +357,17 @@
  * generating shift register interrupts (SR_INT) for us. This means there has
  * to be activity on the ADB bus. The chip will poll to achieve this.
  *
- * The basic ADB state machine was left unchanged from the original MacII code
- * by Alan Cox, which was based on the CUDA driver for PowerMac.
- * The syntax of the ADB status lines is totally different on MacII,
- * though. MacII uses the states Command -> Even -> Odd -> Even ->...-> Idle
- * for sending and Idle -> Even -> Odd -> Even ->...-> Idle for receiving.
- * Start and end of a receive packet are signalled by asserting /IRQ on the
- * interrupt line (/IRQ means the CTLR_IRQ bit in port B; not to be confused
- * with the VIA shift register interrupt. /IRQ never actually interrupts the
- * processor, it's just an ordinary input.)
+ * The VIA Port B output signalling works as follows. After the ADB transceiver
+ * sees a transition on the PB4 and PB5 lines it will crank over the VIA shift
+ * register which eventually raises the SR_INT interrupt. The PB4/PB5 outputs
+ * are toggled with each byte as the ADB transaction progresses.
+ *
+ * Request with no reply expected (and empty transceiver buffer):
+ *     CMD -> IDLE
+ * Request with expected reply packet (or with buffered autopoll packet):
+ *     CMD -> EVEN -> ODD -> EVEN -> ... -> IDLE
+ * Unsolicited packet:
+ *     IDLE -> EVEN -> ODD -> EVEN -> ... -> IDLE
  */
 static irqreturn_t macii_interrupt(int irq, void *arg)
 {
@@ -382,31 +387,31 @@
 		}
 	}
 
-	last_status = status;
 	status = via[B] & (ST_MASK | CTLR_IRQ);
 
 	switch (macii_state) {
 	case idle:
-		if (reading_reply) {
-			reply_ptr = current_req->reply;
-		} else {
-			WARN_ON(current_req);
-			reply_ptr = reply_buf;
-		}
+		WARN_ON((status & ST_MASK) != ST_IDLE);
+
+		reply_ptr = reply_buf;
+		reading_reply = false;
+
+		bus_timeout = false;
+		srq_asserted = false;
 
 		x = via[SR];
 
-		if ((status & CTLR_IRQ) && (x == 0xFF)) {
-			/* Bus timeout without SRQ sequence:
-			 *     data is "FF" while CTLR_IRQ is "H"
+		if (!(status & CTLR_IRQ)) {
+			/* /CTLR_IRQ asserted in idle state means we must
+			 * read an autopoll reply from the transceiver buffer.
 			 */
-			reply_len = 0;
-			srq_asserted = 0;
-			macii_state = read_done;
-		} else {
 			macii_state = reading;
 			*reply_ptr = x;
 			reply_len = 1;
+		} else {
+			/* bus timeout */
+			reply_len = 0;
+			break;
 		}
 
 		/* set ADB state = even for first data byte */
@@ -415,41 +420,83 @@
 
 	case sending:
 		req = current_req;
-		if (data_index >= req->nbytes) {
+
+		if (status == (ST_CMD | CTLR_IRQ)) {
+			/* /CTLR_IRQ de-asserted after the command byte means
+			 * the host can continue with the transaction.
+			 */
+
+			/* Store command byte */
+			last_cmd = req->data[1];
+			if ((last_cmd & OP_MASK) == TALK) {
+				last_talk_cmd = last_cmd;
+				if ((last_cmd & CMD_MASK) == ADB_READREG(0, 0))
+					last_poll_cmd = last_cmd;
+			}
+		}
+
+		if (status == ST_CMD) {
+			/* /CTLR_IRQ asserted after the command byte means we
+			 * must read an autopoll reply. The first byte was
+			 * lost because the shift register was an output.
+			 */
+			macii_state = reading;
+
+			reading_reply = false;
+			reply_ptr = reply_buf;
+			*reply_ptr = last_talk_cmd;
+			reply_len = 1;
+
+			/* reset to shift in */
+			via[ACR] &= ~SR_OUT;
+			x = via[SR];
+		} else if (data_index >= req->nbytes) {
 			req->sent = 1;
-			macii_state = idle;
 
 			if (req->reply_expected) {
-				reading_reply = 1;
-			} else {
+				macii_state = reading;
+
+				reading_reply = true;
+				reply_ptr = req->reply;
+				*reply_ptr = req->data[1];
+				reply_len = 1;
+
+				via[ACR] &= ~SR_OUT;
+				x = via[SR];
+			} else if ((req->data[1] & OP_MASK) == TALK) {
+				macii_state = reading;
+
+				reading_reply = false;
+				reply_ptr = reply_buf;
+				*reply_ptr = req->data[1];
+				reply_len = 1;
+
+				via[ACR] &= ~SR_OUT;
+				x = via[SR];
+
 				req->complete = 1;
 				current_req = req->next;
 				if (req->done)
 					(*req->done)(req);
+			} else {
+				macii_state = idle;
 
-				if (current_req)
-					macii_start();
-				else if (need_autopoll())
-					macii_autopoll(autopoll_devs);
-			}
-
-			if (macii_state == idle) {
-				/* reset to shift in */
-				via[ACR] &= ~SR_OUT;
-				x = via[SR];
-				/* set ADB state idle - might get SRQ */
-				via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
+				req->complete = 1;
+				current_req = req->next;
+				if (req->done)
+					(*req->done)(req);
+				break;
 			}
 		} else {
 			via[SR] = req->data[data_index++];
+		}
 
-			if ((via[B] & ST_MASK) == ST_CMD) {
-				/* just sent the command byte, set to EVEN */
-				via[B] = (via[B] & ~ST_MASK) | ST_EVEN;
-			} else {
-				/* invert state bits, toggle ODD/EVEN */
-				via[B] ^= ST_MASK;
-			}
+		if ((via[B] & ST_MASK) == ST_CMD) {
+			/* just sent the command byte, set to EVEN */
+			via[B] = (via[B] & ~ST_MASK) | ST_EVEN;
+		} else {
+			/* invert state bits, toggle ODD/EVEN */
+			via[B] ^= ST_MASK;
 		}
 		break;
 
@@ -458,33 +505,35 @@
 		WARN_ON((status & ST_MASK) == ST_CMD ||
 			(status & ST_MASK) == ST_IDLE);
 
-		/* Bus timeout with SRQ sequence:
-		 *     data is "XX FF"      while CTLR_IRQ is "L L"
-		 * End of packet without SRQ sequence:
-		 *     data is "XX...YY 00" while CTLR_IRQ is "L...H L"
-		 * End of packet SRQ sequence:
-		 *     data is "XX...YY 00" while CTLR_IRQ is "L...L L"
-		 * (where XX is the first response byte and
-		 * YY is the last byte of valid response data.)
-		 */
-
-		srq_asserted = 0;
 		if (!(status & CTLR_IRQ)) {
-			if (x == 0xFF) {
-				if (!(last_status & CTLR_IRQ)) {
-					macii_state = read_done;
+			if (status == ST_EVEN && reply_len == 1) {
+				bus_timeout = true;
+			} else if (status == ST_ODD && reply_len == 2) {
+				srq_asserted = true;
+			} else {
+				macii_state = idle;
+
+				if (bus_timeout)
 					reply_len = 0;
-					srq_asserted = 1;
+
+				if (reading_reply) {
+					struct adb_request *req = current_req;
+
+					req->reply_len = reply_len;
+
+					req->complete = 1;
+					current_req = req->next;
+					if (req->done)
+						(*req->done)(req);
+				} else if (reply_len && autopoll_devs &&
+					   reply_buf[0] == last_poll_cmd) {
+					adb_input(reply_buf, reply_len, 1);
 				}
-			} else if (x == 0x00) {
-				macii_state = read_done;
-				if (!(last_status & CTLR_IRQ))
-					srq_asserted = 1;
+				break;
 			}
 		}
 
-		if (macii_state == reading &&
-		    reply_len < ARRAY_SIZE(reply_buf)) {
+		if (reply_len < ARRAY_SIZE(reply_buf)) {
 			reply_ptr++;
 			*reply_ptr = x;
 			reply_len++;
@@ -494,37 +543,22 @@
 		via[B] ^= ST_MASK;
 		break;
 
-	case read_done:
-		x = via[SR];
+	default:
+		break;
+	}
 
-		if (reading_reply) {
-			reading_reply = 0;
-			req = current_req;
-			req->reply_len = reply_len;
-			req->complete = 1;
-			current_req = req->next;
-			if (req->done)
-				(*req->done)(req);
-		} else if (reply_len && autopoll_devs)
-			adb_input(reply_buf, reply_len, 0);
-
-		macii_state = idle;
-
-		/* SRQ seen before, initiate poll now */
-		if (srq_asserted)
+	if (macii_state == idle) {
+		if (!current_req)
 			macii_queue_poll();
 
 		if (current_req)
 			macii_start();
-		else if (need_autopoll())
-			macii_autopoll(autopoll_devs);
 
-		if (macii_state == idle)
+		if (macii_state == idle) {
+			via[ACR] &= ~SR_OUT;
+			x = via[SR];
 			via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
-		break;
-
-	default:
-		break;
+		}
 	}
 
 	local_irq_restore(flags);
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index cfea054..86dbe0c 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -538,7 +538,7 @@
 static void writecache_commit_flushed(struct dm_writecache *wc, bool wait_for_ios)
 {
 	if (WC_MODE_PMEM(wc))
-		wmb();
+		pmem_wmb();
 	else
 		ssd_commit_flushed(wc, wait_for_ios);
 }
diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig
index 2d2266c..6551007 100644
--- a/drivers/misc/ocxl/Kconfig
+++ b/drivers/misc/ocxl/Kconfig
@@ -23,7 +23,7 @@
 	  The ocxl driver enables userspace programs to access these
 	  accelerators through devices in /dev/ocxl/.
 
-	  For more information, see http://opencapi.org.
+	  For more information, see https://opencapi.org.
 
 	  This is not to be confused with the support for IBM CAPI
 	  accelerators (CONFIG_CXL), which are PCI-based instead of a
diff --git a/drivers/misc/ocxl/config.c b/drivers/misc/ocxl/config.c
index e3b99a3..4d490b9 100644
--- a/drivers/misc/ocxl/config.c
+++ b/drivers/misc/ocxl/config.c
@@ -71,6 +71,20 @@
 	return 0;
 }
 
+/**
+ * get_function_0() - Find a related PCI device (function 0)
+ * @device: PCI device to match
+ *
+ * Returns a pointer to the related device, or null if not found
+ */
+static struct pci_dev *get_function_0(struct pci_dev *dev)
+{
+	unsigned int devfn = PCI_DEVFN(PCI_SLOT(dev->devfn), 0);
+
+	return pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus),
+					   dev->bus->number, devfn);
+}
+
 static void read_pasid(struct pci_dev *dev, struct ocxl_fn_config *fn)
 {
 	u16 val;
@@ -159,14 +173,15 @@
 static int read_dvsec_vendor(struct pci_dev *dev)
 {
 	int pos;
-	u32 cfg, tlx, dlx;
+	u32 cfg, tlx, dlx, reset_reload;
 
 	/*
-	 * vendor specific DVSEC is optional
+	 * vendor specific DVSEC, for IBM images only. Some older
+	 * images may not have it
 	 *
-	 * It's currently only used on function 0 to specify the
-	 * version of some logic blocks. Some older images may not
-	 * even have it so we ignore any errors
+	 * It's only used on function 0 to specify the version of some
+	 * logic blocks and to give access to special registers to
+	 * enable host-based flashing.
 	 */
 	if (PCI_FUNC(dev->devfn) != 0)
 		return 0;
@@ -178,11 +193,67 @@
 	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_CFG_VERS, &cfg);
 	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_TLX_VERS, &tlx);
 	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_DLX_VERS, &dlx);
+	pci_read_config_dword(dev, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+			      &reset_reload);
 
 	dev_dbg(&dev->dev, "Vendor specific DVSEC:\n");
 	dev_dbg(&dev->dev, "  CFG version = 0x%x\n", cfg);
 	dev_dbg(&dev->dev, "  TLX version = 0x%x\n", tlx);
 	dev_dbg(&dev->dev, "  DLX version = 0x%x\n", dlx);
+	dev_dbg(&dev->dev, "  ResetReload = 0x%x\n", reset_reload);
+	return 0;
+}
+
+static int get_dvsec_vendor0(struct pci_dev *dev, struct pci_dev **dev0,
+			     int *out_pos)
+{
+	int pos;
+
+	if (PCI_FUNC(dev->devfn) != 0) {
+		dev = get_function_0(dev);
+		if (!dev)
+			return -1;
+	}
+	pos = find_dvsec(dev, OCXL_DVSEC_VENDOR_ID);
+	if (!pos)
+		return -1;
+	*dev0 = dev;
+	*out_pos = pos;
+	return 0;
+}
+
+int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val)
+{
+	struct pci_dev *dev0;
+	u32 reset_reload;
+	int pos;
+
+	if (get_dvsec_vendor0(dev, &dev0, &pos))
+		return -1;
+
+	pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+			      &reset_reload);
+	*val = !!(reset_reload & BIT(0));
+	return 0;
+}
+
+int ocxl_config_set_reset_reload(struct pci_dev *dev, int val)
+{
+	struct pci_dev *dev0;
+	u32 reset_reload;
+	int pos;
+
+	if (get_dvsec_vendor0(dev, &dev0, &pos))
+		return -1;
+
+	pci_read_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+			      &reset_reload);
+	if (val)
+		reset_reload |= BIT(0);
+	else
+		reset_reload &= ~BIT(0);
+	pci_write_config_dword(dev0, pos + OCXL_DVSEC_VENDOR_RESET_RELOAD,
+			       reset_reload);
 	return 0;
 }
 
@@ -273,7 +344,7 @@
 }
 
 /**
- * read_template_version - Read the template version from the AFU
+ * read_template_version() - Read the template version from the AFU
  * @dev: the device for the AFU
  * @fn: the AFU offsets
  * @len: outputs the template length
@@ -282,7 +353,7 @@
  * Returns 0 on success, negative on failure
  */
 static int read_template_version(struct pci_dev *dev, struct ocxl_fn_config *fn,
-		u16 *len, u16 *version)
+				 u16 *len, u16 *version)
 {
 	u32 val32;
 	u8 major, minor;
@@ -476,7 +547,7 @@
 }
 
 /**
- * read_afu_lpc_memory_info - Populate AFU metadata regarding LPC memory
+ * read_afu_lpc_memory_info() - Populate AFU metadata regarding LPC memory
  * @dev: the device for the AFU
  * @fn: the AFU offsets
  * @afu: the AFU struct to populate the LPC metadata into
@@ -484,8 +555,8 @@
  * Returns 0 on success, negative on failure
  */
 static int read_afu_lpc_memory_info(struct pci_dev *dev,
-				struct ocxl_fn_config *fn,
-				struct ocxl_afu_config *afu)
+				    struct ocxl_fn_config *fn,
+				    struct ocxl_afu_config *afu)
 {
 	int rc;
 	u32 val32;
diff --git a/drivers/misc/ocxl/ocxl_internal.h b/drivers/misc/ocxl/ocxl_internal.h
index 345bf843..0bad0a1 100644
--- a/drivers/misc/ocxl/ocxl_internal.h
+++ b/drivers/misc/ocxl/ocxl_internal.h
@@ -113,6 +113,12 @@
 int ocxl_config_get_pasid_info(struct pci_dev *dev, int *count);
 
 /*
+ * Control whether the FPGA is reloaded on a link reset
+ */
+int ocxl_config_get_reset_reload(struct pci_dev *dev, int *val);
+int ocxl_config_set_reset_reload(struct pci_dev *dev, int val);
+
+/*
  * Check if an AFU index is valid for the given function.
  *
  * AFU indexes can be sparse, so a driver should check all indexes up
@@ -122,11 +128,12 @@
 				struct ocxl_fn_config *fn, int afu_idx);
 
 /**
- * Update values within a Process Element
+ * ocxl_link_update_pe() - Update values within a Process Element
+ * @link_handle: the link handle associated with the process element
+ * @pasid: the PASID for the AFU context
+ * @tid: the new thread id for the process element
  *
- * link_handle: the link handle associated with the process element
- * pasid: the PASID for the AFU context
- * tid: the new thread id for the process element
+ * Returns 0 on success
  */
 int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid);
 
diff --git a/drivers/misc/ocxl/sysfs.c b/drivers/misc/ocxl/sysfs.c
index 58f1ba2..25c78df 100644
--- a/drivers/misc/ocxl/sysfs.c
+++ b/drivers/misc/ocxl/sysfs.c
@@ -51,11 +51,46 @@
 			afu->pasid_count, afu->pasid_max);
 }
 
+static ssize_t reload_on_reset_show(struct device *device,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct ocxl_afu *afu = to_afu(device);
+	struct ocxl_fn *fn = afu->fn;
+	struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
+	int val;
+
+	if (ocxl_config_get_reset_reload(pci_dev, &val))
+		return scnprintf(buf, PAGE_SIZE, "unavailable\n");
+
+	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+static ssize_t reload_on_reset_store(struct device *device,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct ocxl_afu *afu = to_afu(device);
+	struct ocxl_fn *fn = afu->fn;
+	struct pci_dev *pci_dev = to_pci_dev(fn->dev.parent);
+	int rc, val;
+
+	rc = kstrtoint(buf, 0, &val);
+	if (rc || (val != 0 && val != 1))
+		return -EINVAL;
+
+	if (ocxl_config_set_reset_reload(pci_dev, val))
+		return -ENODEV;
+
+	return count;
+}
+
 static struct device_attribute afu_attrs[] = {
 	__ATTR_RO(global_mmio_size),
 	__ATTR_RO(pp_mmio_size),
 	__ATTR_RO(afu_version),
 	__ATTR_RO(contexts),
+	__ATTR_RW(reload_on_reset),
 };
 
 static ssize_t global_mmio_read(struct file *filp, struct kobject *kobj,
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
index 6826a27..10dbdcd 100644
--- a/drivers/nvdimm/of_pmem.c
+++ b/drivers/nvdimm/of_pmem.c
@@ -90,6 +90,7 @@
 
 static const struct of_device_id of_pmem_region_match[] = {
 	{ .compatible = "pmem-region" },
+	{ .compatible = "pmem-region-v2" },
 	{ },
 };
 
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 4502f9c..c3237c2 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1206,13 +1206,13 @@
 	idx = this_cpu_add_return(flush_idx, hash_32(current->pid + idx, 8));
 
 	/*
-	 * The first wmb() is needed to 'sfence' all previous writes
-	 * such that they are architecturally visible for the platform
-	 * buffer flush.  Note that we've already arranged for pmem
+	 * The pmem_wmb() is needed to 'sfence' all
+	 * previous writes such that they are architecturally visible for
+	 * the platform buffer flush. Note that we've already arranged for pmem
 	 * writes to avoid the cache via memcpy_flushcache().  The final
 	 * wmb() ensures ordering for the NVDIMM flush write.
 	 */
-	wmb();
+	pmem_wmb();
 	for (i = 0; i < nd_region->ndr_mappings; i++)
 		if (ndrd_get_flush_wpq(ndrd, i, 0))
 			writeq(1, ndrd_get_flush_wpq(ndrd, i, idx));
diff --git a/include/asm-generic/barrier.h b/include/asm-generic/barrier.h
index fec97dc..798027b 100644
--- a/include/asm-generic/barrier.h
+++ b/include/asm-generic/barrier.h
@@ -240,5 +240,15 @@
 })
 #endif
 
+/*
+ * pmem_wmb() ensures that all stores for which the modification
+ * are written to persistent storage by preceding instructions have
+ * updated persistent storage before any data  access or data transfer
+ * caused by subsequent instructions is initiated.
+ */
+#ifndef pmem_wmb
+#define pmem_wmb()	wmb()
+#endif
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __ASM_GENERIC_BARRIER_H */
diff --git a/include/asm-generic/qspinlock.h b/include/asm-generic/qspinlock.h
index 2b26cd7..4fe7fd0 100644
--- a/include/asm-generic/qspinlock.h
+++ b/include/asm-generic/qspinlock.h
@@ -13,6 +13,7 @@
 #include <asm-generic/qspinlock_types.h>
 #include <linux/atomic.h>
 
+#ifndef queued_spin_is_locked
 /**
  * queued_spin_is_locked - is the spinlock locked?
  * @lock: Pointer to queued spinlock structure
@@ -26,6 +27,7 @@
 	 */
 	return atomic_read(&lock->val);
 }
+#endif
 
 /**
  * queued_spin_value_unlocked - is the spinlock structure unlocked?
@@ -68,6 +70,7 @@
 
 extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
 
+#ifndef queued_spin_lock
 /**
  * queued_spin_lock - acquire a queued spinlock
  * @lock: Pointer to queued spinlock structure
@@ -81,6 +84,7 @@
 
 	queued_spin_lock_slowpath(lock, val);
 }
+#endif
 
 #ifndef queued_spin_unlock
 /**
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 191772d..a2710e6 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -181,6 +181,7 @@
 	CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_THREAD_IMC_ONLINE,
 	CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
+	CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
 	CPUHP_AP_WATCHDOG_ONLINE,
 	CPUHP_AP_WORKQUEUE_ONLINE,
 	CPUHP_AP_RCUTREE_ONLINE,
diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index ea67910..9e93bef 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -183,17 +183,24 @@
 				   bool get_value);
 void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
 
-int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
-					 unsigned long buf_len);
-void * __weak arch_kexec_kernel_image_load(struct kimage *image);
-int __weak arch_kexec_apply_relocations_add(struct purgatory_info *pi,
-					    Elf_Shdr *section,
-					    const Elf_Shdr *relsec,
-					    const Elf_Shdr *symtab);
-int __weak arch_kexec_apply_relocations(struct purgatory_info *pi,
-					Elf_Shdr *section,
-					const Elf_Shdr *relsec,
-					const Elf_Shdr *symtab);
+/* Architectures may override the below functions */
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+				  unsigned long buf_len);
+void *arch_kexec_kernel_image_load(struct kimage *image);
+int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+				     Elf_Shdr *section,
+				     const Elf_Shdr *relsec,
+				     const Elf_Shdr *symtab);
+int arch_kexec_apply_relocations(struct purgatory_info *pi,
+				 Elf_Shdr *section,
+				 const Elf_Shdr *relsec,
+				 const Elf_Shdr *symtab);
+int arch_kimage_file_post_load_cleanup(struct kimage *image);
+#ifdef CONFIG_KEXEC_SIG
+int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+				 unsigned long buf_len);
+#endif
+int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf);
 
 extern int kexec_add_buffer(struct kexec_buf *kbuf);
 int kexec_locate_mem_hole(struct kexec_buf *kbuf);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b873..6c8333d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -317,8 +317,6 @@
 
 #if defined(CONFIG_X86)
 # define VM_PAT		VM_ARCH_1	/* PAT reserves whole VMA at once (x86) */
-#elif defined(CONFIG_PPC)
-# define VM_SAO		VM_ARCH_1	/* Strong Access Ordering (powerpc) */
 #elif defined(CONFIG_PARISC)
 # define VM_GROWSUP	VM_ARCH_1
 #elif defined(CONFIG_IA64)
diff --git a/include/misc/ocxl-config.h b/include/misc/ocxl-config.h
index 3526fa9..ccfd3b4 100644
--- a/include/misc/ocxl-config.h
+++ b/include/misc/ocxl-config.h
@@ -41,5 +41,6 @@
 #define   OCXL_DVSEC_VENDOR_CFG_VERS            0x0C
 #define   OCXL_DVSEC_VENDOR_TLX_VERS            0x10
 #define   OCXL_DVSEC_VENDOR_DLX_VERS            0x20
+#define   OCXL_DVSEC_VENDOR_RESET_RELOAD        0x38
 
 #endif /* _OCXL_CONFIG_H_ */
diff --git a/include/misc/ocxl.h b/include/misc/ocxl.h
index 06dd583..357ef1a 100644
--- a/include/misc/ocxl.h
+++ b/include/misc/ocxl.h
@@ -62,8 +62,7 @@
 // Device detection & initialisation
 
 /**
- * Open an OpenCAPI function on an OpenCAPI device
- *
+ * ocxl_function_open() - Open an OpenCAPI function on an OpenCAPI device
  * @dev: The PCI device that contains the function
  *
  * Returns an opaque pointer to the function, or an error pointer (check with IS_ERR)
@@ -71,8 +70,7 @@
 struct ocxl_fn *ocxl_function_open(struct pci_dev *dev);
 
 /**
- * Get the list of AFUs associated with a PCI function device
- *
+ * ocxl_function_afu_list() - Get the list of AFUs associated with a PCI function device
  * Returns a list of struct ocxl_afu *
  *
  * @fn: The OpenCAPI function containing the AFUs
@@ -80,8 +78,7 @@
 struct list_head *ocxl_function_afu_list(struct ocxl_fn *fn);
 
 /**
- * Fetch an AFU instance from an OpenCAPI function
- *
+ * ocxl_function_fetch_afu() - Fetch an AFU instance from an OpenCAPI function
  * @fn: The OpenCAPI function to get the AFU from
  * @afu_idx: The index of the AFU to get
  *
@@ -92,23 +89,20 @@
 struct ocxl_afu *ocxl_function_fetch_afu(struct ocxl_fn *fn, u8 afu_idx);
 
 /**
- * Take a reference to an AFU
- *
+ * ocxl_afu_get() - Take a reference to an AFU
  * @afu: The AFU to increment the reference count on
  */
 void ocxl_afu_get(struct ocxl_afu *afu);
 
 /**
- * Release a reference to an AFU
- *
+ * ocxl_afu_put() - Release a reference to an AFU
  * @afu: The AFU to decrement the reference count on
  */
 void ocxl_afu_put(struct ocxl_afu *afu);
 
 
 /**
- * Get the configuration information for an OpenCAPI function
- *
+ * ocxl_function_config() - Get the configuration information for an OpenCAPI function
  * @fn: The OpenCAPI function to get the config for
  *
  * Returns the function config, or NULL on error
@@ -116,8 +110,7 @@
 const struct ocxl_fn_config *ocxl_function_config(struct ocxl_fn *fn);
 
 /**
- * Close an OpenCAPI function
- *
+ * ocxl_function_close() - Close an OpenCAPI function
  * This will free any AFUs previously retrieved from the function, and
  * detach and associated contexts. The contexts must by freed by the caller.
  *
@@ -129,8 +122,7 @@
 // Context allocation
 
 /**
- * Allocate an OpenCAPI context
- *
+ * ocxl_context_alloc() - Allocate an OpenCAPI context
  * @context: The OpenCAPI context to allocate, must be freed with ocxl_context_free
  * @afu: The AFU the context belongs to
  * @mapping: The mapping to unmap when the context is closed (may be NULL)
@@ -139,14 +131,13 @@
 			struct address_space *mapping);
 
 /**
- * Free an OpenCAPI context
- *
+ * ocxl_context_free() - Free an OpenCAPI context
  * @ctx: The OpenCAPI context to free
  */
 void ocxl_context_free(struct ocxl_context *ctx);
 
 /**
- * Grant access to an MM to an OpenCAPI context
+ * ocxl_context_attach() - Grant access to an MM to an OpenCAPI context
  * @ctx: The OpenCAPI context to attach
  * @amr: The value of the AMR register to restrict access
  * @mm: The mm to attach to the context
@@ -157,7 +148,7 @@
 				struct mm_struct *mm);
 
 /**
- * Detach an MM from an OpenCAPI context
+ * ocxl_context_detach() - Detach an MM from an OpenCAPI context
  * @ctx: The OpenCAPI context to attach
  *
  * Returns 0 on success, negative on failure
@@ -167,25 +158,25 @@
 // AFU IRQs
 
 /**
- * Allocate an IRQ associated with an AFU context
+ * ocxl_afu_irq_alloc() - Allocate an IRQ associated with an AFU context
  * @ctx: the AFU context
  * @irq_id: out, the IRQ ID
  *
  * Returns 0 on success, negative on failure
  */
-extern int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id);
+int ocxl_afu_irq_alloc(struct ocxl_context *ctx, int *irq_id);
 
 /**
- * Frees an IRQ associated with an AFU context
+ * ocxl_afu_irq_free() - Frees an IRQ associated with an AFU context
  * @ctx: the AFU context
  * @irq_id: the IRQ ID
  *
  * Returns 0 on success, negative on failure
  */
-extern int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id);
+int ocxl_afu_irq_free(struct ocxl_context *ctx, int irq_id);
 
 /**
- * Gets the address of the trigger page for an IRQ
+ * ocxl_afu_irq_get_addr() - Gets the address of the trigger page for an IRQ
  * This can then be provided to an AFU which will write to that
  * page to trigger the IRQ.
  * @ctx: The AFU context that the IRQ is associated with
@@ -193,10 +184,10 @@
  *
  * returns the trigger page address, or 0 if the IRQ is not valid
  */
-extern u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id);
+u64 ocxl_afu_irq_get_addr(struct ocxl_context *ctx, int irq_id);
 
 /**
- * Provide a callback to be called when an IRQ is triggered
+ * ocxl_irq_set_handler() - Provide a callback to be called when an IRQ is triggered
  * @ctx: The AFU context that the IRQ is associated with
  * @irq_id: The IRQ ID
  * @handler: the callback to be called when the IRQ is triggered
@@ -213,8 +204,7 @@
 // AFU Metadata
 
 /**
- * Get a pointer to the config for an AFU
- *
+ * ocxl_afu_config() - Get a pointer to the config for an AFU
  * @afu: a pointer to the AFU to get the config for
  *
  * Returns a pointer to the AFU config
@@ -222,27 +212,24 @@
 struct ocxl_afu_config *ocxl_afu_config(struct ocxl_afu *afu);
 
 /**
- * Assign opaque hardware specific information to an OpenCAPI AFU.
- *
- * @dev: The PCI device associated with the OpenCAPI device
+ * ocxl_afu_set_private() - Assign opaque hardware specific information to an OpenCAPI AFU.
+ * @afu: The OpenCAPI AFU
  * @private: the opaque hardware specific information to assign to the driver
  */
 void ocxl_afu_set_private(struct ocxl_afu *afu, void *private);
 
 /**
- * Fetch the hardware specific information associated with an external OpenCAPI
- * AFU. This may be consumed by an external OpenCAPI driver.
- *
- * @afu: The AFU
+ * ocxl_afu_get_private() - Fetch the hardware specific information associated with
+ * an external OpenCAPI AFU. This may be consumed by an external OpenCAPI driver.
+ * @afu: The OpenCAPI AFU
  *
  * Returns the opaque pointer associated with the device, or NULL if not set
  */
-void *ocxl_afu_get_private(struct ocxl_afu *dev);
+void *ocxl_afu_get_private(struct ocxl_afu *afu);
 
 // Global MMIO
 /**
- * Read a 32 bit value from global MMIO
- *
+ * ocxl_global_mmio_read32() - Read a 32 bit value from global MMIO
  * @afu: The AFU
  * @offset: The Offset from the start of MMIO
  * @endian: the endianness that the MMIO data is in
@@ -251,11 +238,10 @@
  * Returns 0 for success, negative on error
  */
 int ocxl_global_mmio_read32(struct ocxl_afu *afu, size_t offset,
-				enum ocxl_endian endian, u32 *val);
+			    enum ocxl_endian endian, u32 *val);
 
 /**
- * Read a 64 bit value from global MMIO
- *
+ * ocxl_global_mmio_read64() - Read a 64 bit value from global MMIO
  * @afu: The AFU
  * @offset: The Offset from the start of MMIO
  * @endian: the endianness that the MMIO data is in
@@ -264,11 +250,10 @@
  * Returns 0 for success, negative on error
  */
 int ocxl_global_mmio_read64(struct ocxl_afu *afu, size_t offset,
-				enum ocxl_endian endian, u64 *val);
+			    enum ocxl_endian endian, u64 *val);
 
 /**
- * Write a 32 bit value to global MMIO
- *
+ * ocxl_global_mmio_write32() - Write a 32 bit value to global MMIO
  * @afu: The AFU
  * @offset: The Offset from the start of MMIO
  * @endian: the endianness that the MMIO data is in
@@ -277,11 +262,10 @@
  * Returns 0 for success, negative on error
  */
 int ocxl_global_mmio_write32(struct ocxl_afu *afu, size_t offset,
-				enum ocxl_endian endian, u32 val);
+			     enum ocxl_endian endian, u32 val);
 
 /**
- * Write a 64 bit value to global MMIO
- *
+ * ocxl_global_mmio_write64() - Write a 64 bit value to global MMIO
  * @afu: The AFU
  * @offset: The Offset from the start of MMIO
  * @endian: the endianness that the MMIO data is in
@@ -290,11 +274,10 @@
  * Returns 0 for success, negative on error
  */
 int ocxl_global_mmio_write64(struct ocxl_afu *afu, size_t offset,
-				enum ocxl_endian endian, u64 val);
+			     enum ocxl_endian endian, u64 val);
 
 /**
- * Set bits in a 32 bit global MMIO register
- *
+ * ocxl_global_mmio_set32() - Set bits in a 32 bit global MMIO register
  * @afu: The AFU
  * @offset: The Offset from the start of MMIO
  * @endian: the endianness that the MMIO data is in
@@ -303,11 +286,10 @@
  * Returns 0 for success, negative on error
  */
 int ocxl_global_mmio_set32(struct ocxl_afu *afu, size_t offset,
-				enum ocxl_endian endian, u32 mask);
+			   enum ocxl_endian endian, u32 mask);
 
 /**
- * Set bits in a 64 bit global MMIO register
- *
+ * ocxl_global_mmio_set64() - Set bits in a 64 bit global MMIO register
  * @afu: The AFU
  * @offset: The Offset from the start of MMIO
  * @endian: the endianness that the MMIO data is in
@@ -316,11 +298,10 @@
  * Returns 0 for success, negative on error
  */
 int ocxl_global_mmio_set64(struct ocxl_afu *afu, size_t offset,
-				enum ocxl_endian endian, u64 mask);
+			   enum ocxl_endian endian, u64 mask);
 
 /**
- * Set bits in a 32 bit global MMIO register
- *
+ * ocxl_global_mmio_clear32() - Set bits in a 32 bit global MMIO register
  * @afu: The AFU
  * @offset: The Offset from the start of MMIO
  * @endian: the endianness that the MMIO data is in
@@ -329,11 +310,10 @@
  * Returns 0 for success, negative on error
  */
 int ocxl_global_mmio_clear32(struct ocxl_afu *afu, size_t offset,
-				enum ocxl_endian endian, u32 mask);
+			     enum ocxl_endian endian, u32 mask);
 
 /**
- * Set bits in a 64 bit global MMIO register
- *
+ * ocxl_global_mmio_clear64() - Set bits in a 64 bit global MMIO register
  * @afu: The AFU
  * @offset: The Offset from the start of MMIO
  * @endian: the endianness that the MMIO data is in
@@ -342,7 +322,7 @@
  * Returns 0 for success, negative on error
  */
 int ocxl_global_mmio_clear64(struct ocxl_afu *afu, size_t offset,
-				enum ocxl_endian endian, u64 mask);
+			     enum ocxl_endian endian, u64 mask);
 
 // Functions left here are for compatibility with the cxlflash driver
 
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 5fb7520..939092d 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -114,8 +114,6 @@
 
 #if defined(CONFIG_X86)
 #define __VM_ARCH_SPECIFIC_1 {VM_PAT,     "pat"           }
-#elif defined(CONFIG_PPC)
-#define __VM_ARCH_SPECIFIC_1 {VM_SAO,     "sao"           }
 #elif defined(CONFIG_PARISC) || defined(CONFIG_IA64)
 #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP,	"growsup"	}
 #elif !defined(CONFIG_MMU)
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 94661d2..78c0837 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -636,6 +636,19 @@
 }
 
 /**
+ * arch_kexec_locate_mem_hole - Find free memory to place the segments.
+ * @kbuf:                       Parameters for the memory search.
+ *
+ * On success, kbuf->mem will have the start address of the memory region found.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
+{
+	return kexec_locate_mem_hole(kbuf);
+}
+
+/**
  * kexec_add_buffer - place a buffer in a kexec segment
  * @kbuf:	Buffer contents and memory parameters.
  *
@@ -647,7 +660,6 @@
  */
 int kexec_add_buffer(struct kexec_buf *kbuf)
 {
-
 	struct kexec_segment *ksegment;
 	int ret;
 
@@ -675,7 +687,7 @@
 	kbuf->buf_align = max(kbuf->buf_align, PAGE_SIZE);
 
 	/* Walk the RAM ranges and allocate a suitable range for the buffer */
-	ret = kexec_locate_mem_hole(kbuf);
+	ret = arch_kexec_locate_mem_hole(kbuf);
 	if (ret)
 		return ret;
 
diff --git a/mm/ksm.c b/mm/ksm.c
index 5fb176d..217842a 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -2452,10 +2452,6 @@
 		if (vma_is_dax(vma))
 			return 0;
 
-#ifdef VM_SAO
-		if (*vm_flags & VM_SAO)
-			return 0;
-#endif
 #ifdef VM_SPARC_ADI
 		if (*vm_flags & VM_SPARC_ADI)
 			return 0;
diff --git a/tools/arch/powerpc/include/uapi/asm/kvm.h b/tools/arch/powerpc/include/uapi/asm/kvm.h
index 264e266..c3af3f3 100644
--- a/tools/arch/powerpc/include/uapi/asm/kvm.h
+++ b/tools/arch/powerpc/include/uapi/asm/kvm.h
@@ -640,6 +640,11 @@
 #define KVM_REG_PPC_ONLINE	(KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbf)
 #define KVM_REG_PPC_PTCR	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc0)
 
+/* POWER10 registers */
+#define KVM_REG_PPC_MMCR3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc1)
+#define KVM_REG_PPC_SIER2	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc2)
+#define KVM_REG_PPC_SIER3	(KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xc3)
+
 /* Transactional Memory checkpointed state:
  * This is all GPRs, all VSX regs and a subset of SPRs
  */
diff --git a/tools/perf/arch/powerpc/util/book3s_hcalls.h b/tools/perf/arch/powerpc/util/book3s_hcalls.h
index 54cfa05..488f433 100644
--- a/tools/perf/arch/powerpc/util/book3s_hcalls.h
+++ b/tools/perf/arch/powerpc/util/book3s_hcalls.h
@@ -84,7 +84,7 @@
 	{0x1a4, "H_CREATE_RPT"},				\
 	{0x1a8, "H_REMOVE_RPT"},				\
 	{0x1ac, "H_REGISTER_RPAGES"},				\
-	{0x1b0, "H_DISABLE_AND_GETC"},				\
+	{0x1b0, "H_DISABLE_AND_GET"},				\
 	{0x1b4, "H_ERROR_DATA"},				\
 	{0x1b8, "H_GET_HCA_INFO"},				\
 	{0x1bc, "H_GET_PERF_COUNT"},				\
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 0453c50..55ef151 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -9,7 +9,17 @@
  * This selftest exercises the powerpc alignment fault handler.
  *
  * We create two sets of source and destination buffers, one in regular memory,
- * the other cache-inhibited (we use /dev/fb0 for this).
+ * the other cache-inhibited (by default we use /dev/fb0 for this, but an
+ * alterative path for cache-inhibited memory may be provided).
+ *
+ * One way to get cache-inhibited memory is to use the "mem" kernel parameter
+ * to limit the kernel to less memory than actually exists.  Addresses above
+ * the limit may still be accessed but will be treated as cache-inhibited. For
+ * example, if there is actually 4GB of memory and the parameter "mem=3GB" is
+ * used, memory from address 0xC0000000 onwards is treated as cache-inhibited.
+ * To access this region /dev/mem is used. The kernel should be configured
+ * without CONFIG_STRICT_DEVMEM. In this case use:
+ *         ./alignment_handler /dev/mem 0xc0000000
  *
  * We initialise the source buffers, then use whichever set of load/store
  * instructions is under test to copy bytes from the source buffers to the
@@ -48,11 +58,14 @@
 #include <asm/cputable.h>
 
 #include "utils.h"
+#include "instructions.h"
 
 int bufsize;
 int debug;
 int testing;
 volatile int gotsig;
+char *cipath = "/dev/fb0";
+long cioffset;
 
 void sighandler(int sig, siginfo_t *info, void *ctx)
 {
@@ -84,6 +97,17 @@
 	}							\
 	rc |= do_test(#name, test_##name)
 
+#define TESTP(name, ld_op, st_op, ld_reg, st_reg)		\
+	void test_##name(char *s, char *d)			\
+	{							\
+		asm volatile(					\
+			ld_op(ld_reg, %0, 0, 0)			\
+			st_op(st_reg, %1, 0, 0)			\
+			:: "r"(s), "r"(d), "r"(0)		\
+			: "memory", "vs0", "vs32", "r31");	\
+	}							\
+	rc |= do_test(#name, test_##name)
+
 #define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
 #define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
 #define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
@@ -103,6 +127,17 @@
 #define LOAD_FLOAT_XFORM_TEST(op)  TEST(op, op, stfdx, XFORM, 0, 0)
 #define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
 
+#define LOAD_MLS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_MLS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_8LS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_8LS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, op, PSTFD, 0, 0)
+#define STORE_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, PLFD, op, 0, 0)
+
+#define LOAD_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, op, PSTXV ## tail, 0, 32)
+#define STORE_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, PLXV ## tail, op, 32, 0)
 
 /* FIXME: Unimplemented tests: */
 // STORE_DFORM_TEST(stq)   /* FIXME: need two registers for quad */
@@ -195,17 +230,18 @@
 
 	printf("\tDoing %s:\t", test_name);
 
-	fd = open("/dev/fb0", O_RDWR);
+	fd = open(cipath, O_RDWR);
 	if (fd < 0) {
 		printf("\n");
-		perror("Can't open /dev/fb0 now?");
+		perror("Can't open ci file now?");
 		return 1;
 	}
 
-	ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
-		   fd, 0x0);
-	ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
-		   fd, bufsize);
+	ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+		   fd, cioffset);
+	ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+		   fd, cioffset + bufsize);
+
 	if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
 		printf("\n");
 		perror("mmap failed");
@@ -270,11 +306,11 @@
 	return rc;
 }
 
-static bool can_open_fb0(void)
+static bool can_open_cifile(void)
 {
 	int fd;
 
-	fd = open("/dev/fb0", O_RDWR);
+	fd = open(cipath, O_RDWR);
 	if (fd < 0)
 		return false;
 
@@ -286,7 +322,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("VSX: 2.06B\n");
@@ -304,7 +340,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
 
 	printf("VSX: 2.07B\n");
@@ -320,7 +356,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
 	printf("VSX: 3.00B\n");
@@ -348,11 +384,30 @@
 	return rc;
 }
 
+int test_alignment_handler_vsx_prefix(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_cifile());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	printf("VSX: PREFIX\n");
+	LOAD_VSX_8LS_PREFIX_TEST(PLXSD, 0);
+	LOAD_VSX_8LS_PREFIX_TEST(PLXSSP, 0);
+	LOAD_VSX_8LS_PREFIX_TEST(PLXV0, 0);
+	LOAD_VSX_8LS_PREFIX_TEST(PLXV1, 1);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXSD, 0);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXSSP, 0);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXV0, 0);
+	STORE_VSX_8LS_PREFIX_TEST(PSTXV1, 1);
+	return rc;
+}
+
 int test_alignment_handler_integer(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	printf("Integer\n");
 	LOAD_DFORM_TEST(lbz);
@@ -408,7 +463,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("Integer: 2.06\n");
@@ -419,11 +474,32 @@
 	return rc;
 }
 
+int test_alignment_handler_integer_prefix(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_cifile());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	printf("Integer: PREFIX\n");
+	LOAD_MLS_PREFIX_TEST(PLBZ);
+	LOAD_MLS_PREFIX_TEST(PLHZ);
+	LOAD_MLS_PREFIX_TEST(PLHA);
+	LOAD_MLS_PREFIX_TEST(PLWZ);
+	LOAD_8LS_PREFIX_TEST(PLWA);
+	LOAD_8LS_PREFIX_TEST(PLD);
+	STORE_MLS_PREFIX_TEST(PSTB);
+	STORE_MLS_PREFIX_TEST(PSTH);
+	STORE_MLS_PREFIX_TEST(PSTW);
+	STORE_8LS_PREFIX_TEST(PSTD);
+	return rc;
+}
+
 int test_alignment_handler_vmx(void)
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
 
 	printf("VMX\n");
@@ -451,7 +527,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 
 	printf("Floating point\n");
 	LOAD_FLOAT_DFORM_TEST(lfd);
@@ -479,7 +555,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
 
 	printf("Floating point: 2.05\n");
@@ -497,7 +573,7 @@
 {
 	int rc = 0;
 
-	SKIP_IF(!can_open_fb0());
+	SKIP_IF(!can_open_cifile());
 	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
 
 	printf("Floating point: 2.06\n");
@@ -507,13 +583,32 @@
 	return rc;
 }
 
+
+int test_alignment_handler_fp_prefix(void)
+{
+	int rc = 0;
+
+	SKIP_IF(!can_open_cifile());
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+	printf("Floating point: PREFIX\n");
+	LOAD_FLOAT_DFORM_TEST(lfs);
+	LOAD_FLOAT_MLS_PREFIX_TEST(PLFS);
+	LOAD_FLOAT_MLS_PREFIX_TEST(PLFD);
+	STORE_FLOAT_MLS_PREFIX_TEST(PSTFS);
+	STORE_FLOAT_MLS_PREFIX_TEST(PSTFD);
+	return rc;
+}
+
 void usage(char *prog)
 {
-	printf("Usage: %s [options]\n", prog);
+	printf("Usage: %s [options] [path [offset]]\n", prog);
 	printf("  -d	Enable debug error output\n");
 	printf("\n");
-	printf("This test requires a POWER8 or POWER9 CPU and a usable ");
-	printf("framebuffer at /dev/fb0.\n");
+	printf("This test requires a POWER8, POWER9 or POWER10 CPU ");
+	printf("and either a usable framebuffer at /dev/fb0 or ");
+	printf("the path to usable cache inhibited memory and optional ");
+	printf("offset to be provided\n");
 }
 
 int main(int argc, char *argv[])
@@ -533,6 +628,13 @@
 			exit(1);
 		}
 	}
+	argc -= optind;
+	argv += optind;
+
+	if (argc > 0)
+		cipath = argv[0];
+	if (argc > 1)
+		cioffset = strtol(argv[1], 0, 0x10);
 
 	bufsize = getpagesize();
 
@@ -552,10 +654,14 @@
 			   "test_alignment_handler_vsx_207");
 	rc |= test_harness(test_alignment_handler_vsx_300,
 			   "test_alignment_handler_vsx_300");
+	rc |= test_harness(test_alignment_handler_vsx_prefix,
+			   "test_alignment_handler_vsx_prefix");
 	rc |= test_harness(test_alignment_handler_integer,
 			   "test_alignment_handler_integer");
 	rc |= test_harness(test_alignment_handler_integer_206,
 			   "test_alignment_handler_integer_206");
+	rc |= test_harness(test_alignment_handler_integer_prefix,
+			   "test_alignment_handler_integer_prefix");
 	rc |= test_harness(test_alignment_handler_vmx,
 			   "test_alignment_handler_vmx");
 	rc |= test_harness(test_alignment_handler_fp,
@@ -564,5 +670,7 @@
 			   "test_alignment_handler_fp_205");
 	rc |= test_harness(test_alignment_handler_fp_206,
 			   "test_alignment_handler_fp_206");
+	rc |= test_harness(test_alignment_handler_fp_prefix,
+			   "test_alignment_handler_fp_prefix");
 	return rc;
 }
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index a2e8c9d..d50cc05 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -19,6 +19,7 @@
 #include <limits.h>
 #include <sys/time.h>
 #include <sys/syscall.h>
+#include <sys/sysinfo.h>
 #include <sys/types.h>
 #include <sys/shm.h>
 #include <linux/futex.h>
@@ -104,8 +105,9 @@
 
 static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
 {
-	int pid;
-	cpu_set_t cpuset;
+	int pid, ncpus;
+	cpu_set_t *cpuset;
+	size_t size;
 
 	pid = fork();
 	if (pid == -1) {
@@ -116,14 +118,23 @@
 	if (pid)
 		return;
 
-	CPU_ZERO(&cpuset);
-	CPU_SET(cpu, &cpuset);
+	ncpus = get_nprocs();
+	size = CPU_ALLOC_SIZE(ncpus);
+	cpuset = CPU_ALLOC(ncpus);
+	if (!cpuset) {
+		perror("malloc");
+		exit(1);
+	}
+	CPU_ZERO_S(size, cpuset);
+	CPU_SET_S(cpu, size, cpuset);
 
-	if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+	if (sched_setaffinity(0, size, cpuset)) {
 		perror("sched_setaffinity");
+		CPU_FREE(cpuset);
 		exit(1);
 	}
 
+	CPU_FREE(cpuset);
 	fn(arg);
 
 	exit(0);
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
index f52ed92..00dc32c 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
@@ -5,12 +5,17 @@
 	local dev="$1"
 	local path="/sys/bus/pci/devices/$dev/eeh_pe_state"
 
-	if ! [ -e "$path" ] ; then
+	# if a driver doesn't support the error handling callbacks then the
+	# device is recovered by removing and re-probing it. This causes the
+	# sysfs directory to disappear so read the PE state once and squash
+	# any potential error messages
+	local eeh_state="$(cat $path 2>/dev/null)"
+	if [ -z "$eeh_state" ]; then
 		return 1;
 	fi
 
-	local fw_state="$(cut -d' ' -f1 < $path)"
-	local sw_state="$(cut -d' ' -f2 < $path)"
+	local fw_state="$(echo $eeh_state | cut -d' ' -f1)"
+	local sw_state="$(echo $eeh_state | cut -d' ' -f2)"
 
 	# If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an
 	# error state or being recovered. Either way, not ok.
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
index f36061e..4efa631 100644
--- a/tools/testing/selftests/powerpc/include/instructions.h
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -66,4 +66,81 @@
 #define PPC_INST_PASTE                 __PASTE(0, 0, 0, 0)
 #define PPC_INST_PASTE_LAST            __PASTE(0, 0, 1, 1)
 
+/* This defines the prefixed load/store instructions */
+#ifdef __ASSEMBLY__
+#  define stringify_in_c(...)	__VA_ARGS__
+#else
+#  define __stringify_in_c(...)	#__VA_ARGS__
+#  define stringify_in_c(...)	__stringify_in_c(__VA_ARGS__) " "
+#endif
+
+#define __PPC_RA(a)	(((a) & 0x1f) << 16)
+#define __PPC_RS(s)	(((s) & 0x1f) << 21)
+#define __PPC_RT(t)	__PPC_RS(t)
+#define __PPC_PREFIX_R(r)	(((r) & 0x1) << 20)
+
+#define PPC_PREFIX_MLS			0x06000000
+#define PPC_PREFIX_8LS			0x04000000
+
+#define PPC_INST_LBZ			0x88000000
+#define PPC_INST_LHZ			0xa0000000
+#define PPC_INST_LHA			0xa8000000
+#define PPC_INST_LWZ			0x80000000
+#define PPC_INST_STB			0x98000000
+#define PPC_INST_STH			0xb0000000
+#define PPC_INST_STW			0x90000000
+#define PPC_INST_STD			0xf8000000
+#define PPC_INST_LFS			0xc0000000
+#define PPC_INST_LFD			0xc8000000
+#define PPC_INST_STFS			0xd0000000
+#define PPC_INST_STFD			0xd8000000
+
+#define PREFIX_MLS(instr, t, a, r, d)	stringify_in_c(.balign 64, , 4;)		\
+					stringify_in_c(.long PPC_PREFIX_MLS |		\
+						       __PPC_PREFIX_R(r) |		\
+						       (((d) >> 16) & 0x3ffff);)	\
+					stringify_in_c(.long (instr)  |			\
+						       __PPC_RT(t) |			\
+						       __PPC_RA(a) |			\
+						       ((d) & 0xffff);\n)
+
+#define PREFIX_8LS(instr, t, a, r, d)	stringify_in_c(.balign 64, , 4;)		\
+					stringify_in_c(.long PPC_PREFIX_8LS |		\
+						       __PPC_PREFIX_R(r) |		\
+						       (((d) >> 16) & 0x3ffff);)	\
+					stringify_in_c(.long (instr)  |			\
+						       __PPC_RT(t) |			\
+						       __PPC_RA(a) |			\
+						       ((d) & 0xffff);\n)
+
+/* Prefixed Integer Load/Store instructions */
+#define PLBZ(t, a, r, d)		PREFIX_MLS(PPC_INST_LBZ, t, a, r, d)
+#define PLHZ(t, a, r, d)		PREFIX_MLS(PPC_INST_LHZ, t, a, r, d)
+#define PLHA(t, a, r, d)		PREFIX_MLS(PPC_INST_LHA, t, a, r, d)
+#define PLWZ(t, a, r, d)		PREFIX_MLS(PPC_INST_LWZ, t, a, r, d)
+#define PLWA(t, a, r, d)		PREFIX_8LS(0xa4000000, t, a, r, d)
+#define PLD(t, a, r, d)			PREFIX_8LS(0xe4000000, t, a, r, d)
+#define PLQ(t, a, r, d)			PREFIX_8LS(0xe0000000, t, a, r, d)
+#define PSTB(s, a, r, d)		PREFIX_MLS(PPC_INST_STB, s, a, r, d)
+#define PSTH(s, a, r, d)		PREFIX_MLS(PPC_INST_STH, s, a, r, d)
+#define PSTW(s, a, r, d)		PREFIX_MLS(PPC_INST_STW, s, a, r, d)
+#define PSTD(s, a, r, d)		PREFIX_8LS(0xf4000000, s, a, r, d)
+#define PSTQ(s, a, r, d)		PREFIX_8LS(0xf0000000, s, a, r, d)
+
+/* Prefixed Floating-Point Load/Store Instructions */
+#define PLFS(frt, a, r, d)		PREFIX_MLS(PPC_INST_LFS, frt, a, r, d)
+#define PLFD(frt, a, r, d)		PREFIX_MLS(PPC_INST_LFD, frt, a, r, d)
+#define PSTFS(frs, a, r, d)		PREFIX_MLS(PPC_INST_STFS, frs, a, r, d)
+#define PSTFD(frs, a, r, d)		PREFIX_MLS(PPC_INST_STFD, frs, a, r, d)
+
+/* Prefixed VSX Load/Store Instructions */
+#define PLXSD(vrt, a, r, d)		PREFIX_8LS(0xa8000000, vrt, a, r, d)
+#define PLXSSP(vrt, a, r, d)		PREFIX_8LS(0xac000000, vrt, a, r, d)
+#define PLXV0(s, a, r, d)		PREFIX_8LS(0xc8000000, s, a, r, d)
+#define PLXV1(s, a, r, d)		PREFIX_8LS(0xcc000000, s, a, r, d)
+#define PSTXSD(vrs, a, r, d)		PREFIX_8LS(0xb8000000, vrs, a, r, d)
+#define PSTXSSP(vrs, a, r, d)		PREFIX_8LS(0xbc000000, vrs, a, r, d)
+#define PSTXV0(s, a, r, d)		PREFIX_8LS(0xd8000000, s, a, r, d)
+#define PSTXV1(s, a, r, d)		PREFIX_8LS(0xdc000000, s, a, r, d)
+
 #endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
new file mode 100644
index 0000000..3312cb1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PKEYS_H
+#define _SELFTESTS_POWERPC_PKEYS_H
+
+#include <sys/mman.h>
+
+#include "reg.h"
+#include "utils.h"
+
+/*
+ * Older versions of libc use the Intel-specific access rights.
+ * Hence, override the definitions as they might be incorrect.
+ */
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS	0x3
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE	0x2
+
+#undef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE	0x4
+
+/* Older versions of libc do not not define this */
+#ifndef SEGV_PKUERR
+#define SEGV_PKUERR	4
+#endif
+
+#define SI_PKEY_OFFSET	0x20
+
+#define __NR_pkey_mprotect	386
+#define __NR_pkey_alloc		384
+#define __NR_pkey_free		385
+
+#define PKEY_BITS_PER_PKEY	2
+#define NR_PKEYS		32
+#define PKEY_BITS_MASK		((1UL << PKEY_BITS_PER_PKEY) - 1)
+
+inline unsigned long pkeyreg_get(void)
+{
+	return mfspr(SPRN_AMR);
+}
+
+inline void pkeyreg_set(unsigned long amr)
+{
+	set_amr(amr);
+}
+
+void pkey_set_rights(int pkey, unsigned long rights)
+{
+	unsigned long amr, shift;
+
+	shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+	amr = pkeyreg_get();
+	amr &= ~(PKEY_BITS_MASK << shift);
+	amr |= (rights & PKEY_BITS_MASK) << shift;
+	pkeyreg_set(amr);
+}
+
+int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey)
+{
+	return syscall(__NR_pkey_mprotect, addr, len, prot, pkey);
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long rights)
+{
+	return syscall(__NR_pkey_alloc, flags, rights);
+}
+
+int sys_pkey_free(int pkey)
+{
+	return syscall(__NR_pkey_free, pkey);
+}
+
+int pkeys_unsupported(void)
+{
+	bool hash_mmu = false;
+	int pkey;
+
+	/* Protection keys are currently supported on Hash MMU only */
+	FAIL_IF(using_hash_mmu(&hash_mmu));
+	SKIP_IF(!hash_mmu);
+
+	/* Check if the system call is supported */
+	pkey = sys_pkey_alloc(0, 0);
+	SKIP_IF(pkey < 0);
+	sys_pkey_free(pkey);
+
+	return 0;
+}
+
+int siginfo_pkey(siginfo_t *si)
+{
+	/*
+	 * In older versions of libc, siginfo_t does not have si_pkey as
+	 * a member.
+	 */
+#ifdef si_pkey
+	return si->si_pkey;
+#else
+	return *((int *)(((char *) si) + SI_PKEY_OFFSET));
+#endif
+}
+
+#define pkey_rights(r) ({						\
+	static char buf[4] = "rwx";					\
+	unsigned int amr_bits;						\
+	if ((r) & PKEY_DISABLE_EXECUTE)					\
+		buf[2] = '-';						\
+	amr_bits = (r) & PKEY_BITS_MASK;				\
+	if (amr_bits & PKEY_DISABLE_WRITE)				\
+		buf[1] = '-';						\
+	if (amr_bits & PKEY_DISABLE_ACCESS & ~PKEY_DISABLE_WRITE)	\
+		buf[0] = '-';						\
+	buf;								\
+})
+
+unsigned long next_pkey_rights(unsigned long rights)
+{
+	if (rights == PKEY_DISABLE_ACCESS)
+		return PKEY_DISABLE_EXECUTE;
+	else if (rights == (PKEY_DISABLE_ACCESS | PKEY_DISABLE_EXECUTE))
+		return 0;
+
+	if ((rights & PKEY_BITS_MASK) == 0)
+		rights |= PKEY_DISABLE_WRITE;
+	else if ((rights & PKEY_BITS_MASK) == PKEY_DISABLE_WRITE)
+		rights |= PKEY_DISABLE_ACCESS;
+
+	return rights;
+}
+
+#endif /* _SELFTESTS_POWERPC_PKEYS_H */
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 022c507..c0f2742 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -57,6 +57,12 @@
 #define SPRN_PPR       896	/* Program Priority Register */
 #define SPRN_AMR       13	/* Authority Mask Register - problem state */
 
+#define set_amr(v)	asm volatile("isync;" \
+				     "mtspr " __stringify(SPRN_AMR) ",%0;" \
+				     "isync" : \
+				    : "r" ((unsigned long)(v)) \
+				    : "memory")
+
 /* TEXASR register bits */
 #define TEXASR_FC	0xFE00000000000000
 #define TEXASR_FP	0x0100000000000000
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index e089a0c..71d2924f 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -42,6 +42,16 @@
 int perf_event_disable(int fd);
 int perf_event_reset(int fd);
 
+#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static inline pid_t gettid(void)
+{
+	return syscall(SYS_gettid);
+}
+#endif
+
 static inline bool have_hwcap(unsigned long ftr)
 {
 	return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -60,6 +70,7 @@
 #endif
 
 bool is_ppc64le(void);
+int using_hash_mmu(bool *using_hash);
 
 /* Yes, this is evil */
 #define FAIL_IF(x)						\
@@ -71,6 +82,15 @@
 	}							\
 } while (0)
 
+#define FAIL_IF_EXIT(x)						\
+do {								\
+	if ((x)) {						\
+		fprintf(stderr,					\
+		"[FAIL] Test FAILED on line %d\n", __LINE__);	\
+		_exit(1);					\
+	}							\
+} while (0)
+
 /* The test harness uses this, yes it's gross */
 #define MAGIC_SKIP_RETURN_VALUE	99
 
@@ -96,11 +116,20 @@
 #define _str(s) #s
 #define str(s) _str(s)
 
+#define sigsafe_err(msg)	({ \
+		ssize_t nbytes __attribute__((unused)); \
+		nbytes = write(STDERR_FILENO, msg, strlen(msg)); })
+
 /* POWER9 feature */
 #ifndef PPC_FEATURE2_ARCH_3_00
 #define PPC_FEATURE2_ARCH_3_00 0x00800000
 #endif
 
+/* POWER10 feature */
+#ifndef PPC_FEATURE2_ARCH_3_1
+#define PPC_FEATURE2_ARCH_3_1 0x00040000
+#endif
+
 #if defined(__powerpc64__)
 #define UCONTEXT_NIA(UC)	(UC)->uc_mcontext.gp_regs[PT_NIP]
 #define UCONTEXT_MSR(UC)	(UC)->uc_mcontext.gp_regs[PT_MSR]
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
index e31ca6f..d0c23b2 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -6,3 +6,4 @@
 fpu_signal
 vmx_signal
 vsx_preempt
+fpu_denormal
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 11a10d7..fcc91c2 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt
 
 top_srcdir = ../../../../..
 include ../../lib.mk
@@ -11,9 +11,9 @@
 $(OUTPUT)/fpu_preempt: fpu_asm.S
 $(OUTPUT)/fpu_signal:  fpu_asm.S
 
-$(OUTPUT)/vmx_syscall: vmx_asm.S
-$(OUTPUT)/vmx_preempt: vmx_asm.S
-$(OUTPUT)/vmx_signal: vmx_asm.S
+$(OUTPUT)/vmx_syscall: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_preempt: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c
 
 $(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
-$(OUTPUT)/vsx_preempt: vsx_asm.S
+$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c
diff --git a/tools/testing/selftests/powerpc/math/fpu_denormal.c b/tools/testing/selftests/powerpc/math/fpu_denormal.c
new file mode 100644
index 0000000..5f96682
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_denormal.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * This test attempts to cause a FP denormal exception on POWER8 CPUs. Unfortunately
+ * if the denormal handler is not configured or working properly, this can cause a bad
+ * crash in kernel mode when the kernel tries to save FP registers when the process
+ * exits.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static int test_denormal_fpu(void)
+{
+	unsigned int m32;
+	unsigned long m64;
+	volatile float f;
+	volatile double d;
+
+	/* try to induce lfs <denormal> ; stfd */
+
+	m32 = 0x00715fcf; /* random denormal */
+	memcpy((float *)&f, &m32, sizeof(f));
+	d = f;
+	memcpy(&m64, (double *)&d, sizeof(d));
+
+	FAIL_IF((long)(m64 != 0x380c57f3c0000000)); /* renormalised value */
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test_denormal_fpu, "fpu_denormal");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
index 2e059f1..6761d6c 100644
--- a/tools/testing/selftests/powerpc/math/vmx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -57,6 +57,9 @@
 	int i, rc, threads;
 	pthread_t *tids;
 
+	// vcmpequd used in vmx_asm.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
 	tids = malloc(threads * sizeof(pthread_t));
 	FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
index 785a48e..b340a5c 100644
--- a/tools/testing/selftests/powerpc/math/vmx_signal.c
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -96,6 +96,9 @@
 	void *rc_p;
 	pthread_t *tids;
 
+	// vcmpequd used in vmx_asm.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
 	tids = malloc(threads * sizeof(pthread_t));
 	FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
index 9ee293c..03c78df 100644
--- a/tools/testing/selftests/powerpc/math/vmx_syscall.c
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -49,9 +49,14 @@
 	 * Setup an environment with much context switching
 	 */
 	pid_t pid2;
-	pid_t pid = fork();
+	pid_t pid;
 	int ret;
 	int child_ret;
+
+	// vcmpequd used in vmx_asm.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+	pid = fork();
 	FAIL_IF(pid == -1);
 
 	pid2 = fork();
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c
index 63de9c6..d1601bb 100644
--- a/tools/testing/selftests/powerpc/math/vsx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -92,6 +92,8 @@
 	int i, rc, threads;
 	pthread_t *tids;
 
+	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
 	threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
 	tids = malloc(threads * sizeof(pthread_t));
 	FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 2ca5232..91c775c 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,9 +2,12 @@
 hugetlb_vs_thp_test
 subpage_prot
 tempfile
-prot_sao
 segv_errors
 wild_bctr
 large_vm_fork_separation
 bad_accesses
 tlbie_test
+pkey_exec_prot
+pkey_siginfo
+stack_expansion_ldst
+stack_expansion_signal
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index b9103c4..250ce17 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,23 +2,31 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
-		  large_vm_fork_separation bad_accesses
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot segv_errors wild_bctr \
+		  large_vm_fork_separation bad_accesses pkey_exec_prot \
+		  pkey_siginfo stack_expansion_signal stack_expansion_ldst
+
 TEST_GEN_PROGS_EXTENDED := tlbie_test
 TEST_GEN_FILES := tempfile
 
 top_srcdir = ../../../../..
 include ../../lib.mk
 
-$(TEST_GEN_PROGS): ../harness.c
-
-$(OUTPUT)/prot_sao: ../utils.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
 
 $(OUTPUT)/wild_bctr: CFLAGS += -m64
 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
 $(OUTPUT)/bad_accesses: CFLAGS += -m64
+$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
+$(OUTPUT)/pkey_siginfo: CFLAGS += -m64
+
+$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c
+
+$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector
+$(OUTPUT)/stack_expansion_ldst: ../utils.c
 
 $(OUTPUT)/tempfile:
 	dd if=/dev/zero of=$@ bs=64k count=1
 
 $(OUTPUT)/tlbie_test: LDLIBS += -lpthread
+$(OUTPUT)/pkey_siginfo: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index adc465f..a864ed7 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -64,34 +64,6 @@
 	return 0;
 }
 
-static int using_hash_mmu(bool *using_hash)
-{
-	char line[128];
-	FILE *f;
-	int rc;
-
-	f = fopen("/proc/cpuinfo", "r");
-	FAIL_IF(!f);
-
-	rc = 0;
-	while (fgets(line, sizeof(line), f) != NULL) {
-		if (strcmp(line, "MMU		: Hash\n") == 0) {
-			*using_hash = true;
-			goto out;
-		}
-
-		if (strcmp(line, "MMU		: Radix\n") == 0) {
-			*using_hash = false;
-			goto out;
-		}
-	}
-
-	rc = -1;
-out:
-	fclose(f);
-	return rc;
-}
-
 static int test(void)
 {
 	unsigned long i, j, addr, region_shift, page_shift, page_size;
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
new file mode 100644
index 0000000..9e5c7f3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if applying execute protection on pages using memory
+ * protection keys works as expected.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP	0x60000000
+#define PPC_INST_TRAP	0x7fe00008
+#define PPC_INST_BLR	0x4e800020
+
+static volatile sig_atomic_t fault_pkey, fault_code, fault_type;
+static volatile sig_atomic_t remaining_faults;
+static volatile unsigned int *fault_addr;
+static unsigned long pgsize, numinsns;
+static unsigned int *insns;
+
+static void trap_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+	/* Check if this fault originated from the expected address */
+	if (sinfo->si_addr != (void *) fault_addr)
+		sigsafe_err("got a fault for an unexpected address\n");
+
+	_exit(1);
+}
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+	int signal_pkey;
+
+	signal_pkey = siginfo_pkey(sinfo);
+	fault_code = sinfo->si_code;
+
+	/* Check if this fault originated from the expected address */
+	if (sinfo->si_addr != (void *) fault_addr) {
+		sigsafe_err("got a fault for an unexpected address\n");
+		_exit(1);
+	}
+
+	/* Check if too many faults have occurred for a single test case */
+	if (!remaining_faults) {
+		sigsafe_err("got too many faults for the same address\n");
+		_exit(1);
+	}
+
+
+	/* Restore permissions in order to continue */
+	switch (fault_code) {
+	case SEGV_ACCERR:
+		if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) {
+			sigsafe_err("failed to set access permissions\n");
+			_exit(1);
+		}
+		break;
+	case SEGV_PKUERR:
+		if (signal_pkey != fault_pkey) {
+			sigsafe_err("got a fault for an unexpected pkey\n");
+			_exit(1);
+		}
+
+		switch (fault_type) {
+		case PKEY_DISABLE_ACCESS:
+			pkey_set_rights(fault_pkey, 0);
+			break;
+		case PKEY_DISABLE_EXECUTE:
+			/*
+			 * Reassociate the exec-only pkey with the region
+			 * to be able to continue. Unlike AMR, we cannot
+			 * set IAMR directly from userspace to restore the
+			 * permissions.
+			 */
+			if (mprotect(insns, pgsize, PROT_EXEC)) {
+				sigsafe_err("failed to set execute permissions\n");
+				_exit(1);
+			}
+			break;
+		default:
+			sigsafe_err("got a fault with an unexpected type\n");
+			_exit(1);
+		}
+		break;
+	default:
+		sigsafe_err("got a fault with an unexpected code\n");
+		_exit(1);
+	}
+
+	remaining_faults--;
+}
+
+static int test(void)
+{
+	struct sigaction segv_act, trap_act;
+	unsigned long rights;
+	int pkey, ret, i;
+
+	ret = pkeys_unsupported();
+	if (ret)
+		return ret;
+
+	/* Setup SIGSEGV handler */
+	segv_act.sa_handler = 0;
+	segv_act.sa_sigaction = segv_handler;
+	FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0);
+	segv_act.sa_flags = SA_SIGINFO;
+	segv_act.sa_restorer = 0;
+	FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0);
+
+	/* Setup SIGTRAP handler */
+	trap_act.sa_handler = 0;
+	trap_act.sa_sigaction = trap_handler;
+	FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0);
+	trap_act.sa_flags = SA_SIGINFO;
+	trap_act.sa_restorer = 0;
+	FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0);
+
+	/* Setup executable region */
+	pgsize = getpagesize();
+	numinsns = pgsize / sizeof(unsigned int);
+	insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE,
+				      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	FAIL_IF(insns == MAP_FAILED);
+
+	/* Write the instruction words */
+	for (i = 1; i < numinsns - 1; i++)
+		insns[i] = PPC_INST_NOP;
+
+	/*
+	 * Set the first instruction as an unconditional trap. If
+	 * the last write to this address succeeds, this should
+	 * get overwritten by a no-op.
+	 */
+	insns[0] = PPC_INST_TRAP;
+
+	/*
+	 * Later, to jump to the executable region, we use a branch
+	 * and link instruction (bctrl) which sets the return address
+	 * automatically in LR. Use that to return back.
+	 */
+	insns[numinsns - 1] = PPC_INST_BLR;
+
+	/* Allocate a pkey that restricts execution */
+	rights = PKEY_DISABLE_EXECUTE;
+	pkey = sys_pkey_alloc(0, rights);
+	FAIL_IF(pkey < 0);
+
+	/*
+	 * Pick the first instruction's address from the executable
+	 * region.
+	 */
+	fault_addr = insns;
+
+	/* The following two cases will avoid SEGV_PKUERR */
+	fault_type = -1;
+	fault_pkey = -1;
+
+	/*
+	 * Read an instruction word from the address when AMR bits
+	 * are not set i.e. the pkey permits both read and write
+	 * access.
+	 *
+	 * This should not generate a fault as having PROT_EXEC
+	 * implies PROT_READ on GNU systems. The pkey currently
+	 * restricts execution only based on the IAMR bits. The
+	 * AMR bits are cleared.
+	 */
+	remaining_faults = 0;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	printf("read from %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	i = *fault_addr;
+	FAIL_IF(remaining_faults != 0);
+
+	/*
+	 * Write an instruction word to the address when AMR bits
+	 * are not set i.e. the pkey permits both read and write
+	 * access.
+	 *
+	 * This should generate an access fault as having just
+	 * PROT_EXEC also restricts writes. The pkey currently
+	 * restricts execution only based on the IAMR bits. The
+	 * AMR bits are cleared.
+	 */
+	remaining_faults = 1;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	printf("write to %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	*fault_addr = PPC_INST_TRAP;
+	FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+	/* The following three cases will generate SEGV_PKUERR */
+	rights |= PKEY_DISABLE_ACCESS;
+	fault_type = PKEY_DISABLE_ACCESS;
+	fault_pkey = pkey;
+
+	/*
+	 * Read an instruction word from the address when AMR bits
+	 * are set i.e. the pkey permits neither read nor write
+	 * access.
+	 *
+	 * This should generate a pkey fault based on AMR bits only
+	 * as having PROT_EXEC implicitly allows reads.
+	 */
+	remaining_faults = 1;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	pkey_set_rights(pkey, rights);
+	printf("read from %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	i = *fault_addr;
+	FAIL_IF(remaining_faults != 0 || fault_code != SEGV_PKUERR);
+
+	/*
+	 * Write an instruction word to the address when AMR bits
+	 * are set i.e. the pkey permits neither read nor write
+	 * access.
+	 *
+	 * This should generate two faults. First, a pkey fault
+	 * based on AMR bits and then an access fault since
+	 * PROT_EXEC does not allow writes.
+	 */
+	remaining_faults = 2;
+	FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+	pkey_set_rights(pkey, rights);
+	printf("write to %p, pkey permissions are %s\n", fault_addr,
+	       pkey_rights(rights));
+	*fault_addr = PPC_INST_NOP;
+	FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+	/* Free the current pkey */
+	sys_pkey_free(pkey);
+
+	rights = 0;
+	do {
+		/*
+		 * Allocate pkeys with all valid combinations of read,
+		 * write and execute restrictions.
+		 */
+		pkey = sys_pkey_alloc(0, rights);
+		FAIL_IF(pkey < 0);
+
+		/*
+		 * Jump to the executable region. AMR bits may or may not
+		 * be set but they should not affect execution.
+		 *
+		 * This should generate pkey faults based on IAMR bits which
+		 * may be set to restrict execution.
+		 *
+		 * The first iteration also checks if the overwrite of the
+		 * first instruction word from a trap to a no-op succeeded.
+		 */
+		fault_pkey = pkey;
+		fault_type = -1;
+		remaining_faults = 0;
+		if (rights & PKEY_DISABLE_EXECUTE) {
+			fault_type = PKEY_DISABLE_EXECUTE;
+			remaining_faults = 1;
+		}
+
+		FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+		printf("execute at %p, pkey permissions are %s\n", fault_addr,
+		       pkey_rights(rights));
+		asm volatile("mtctr	%0; bctrl" : : "r"(insns));
+		FAIL_IF(remaining_faults != 0);
+		if (rights & PKEY_DISABLE_EXECUTE)
+			FAIL_IF(fault_code != SEGV_PKUERR);
+
+		/* Free the current pkey */
+		sys_pkey_free(pkey);
+
+		/* Find next valid combination of pkey rights */
+		rights = next_pkey_rights(rights);
+	} while (rights);
+
+	/* Cleanup */
+	munmap((void *) insns, pgsize);
+
+	return 0;
+}
+
+int main(void)
+{
+	test_harness(test, "pkey_exec_prot");
+}
diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
new file mode 100644
index 0000000..4f815d7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if the signal information reports the correct memory protection
+ * key upon getting a key access violation fault for a page that was
+ * attempted to be protected by two different keys from two competing
+ * threads at the same time.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP	0x60000000
+#define PPC_INST_BLR	0x4e800020
+#define PROT_RWX	(PROT_READ | PROT_WRITE | PROT_EXEC)
+
+#define NUM_ITERATIONS	1000000
+
+static volatile sig_atomic_t perm_pkey, rest_pkey;
+static volatile sig_atomic_t rights, fault_count;
+static volatile unsigned int *volatile fault_addr;
+static pthread_barrier_t iteration_barrier;
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+	void *pgstart;
+	size_t pgsize;
+	int pkey;
+
+	pkey = siginfo_pkey(sinfo);
+
+	/* Check if this fault originated from a pkey access violation */
+	if (sinfo->si_code != SEGV_PKUERR) {
+		sigsafe_err("got a fault for an unexpected reason\n");
+		_exit(1);
+	}
+
+	/* Check if this fault originated from the expected address */
+	if (sinfo->si_addr != (void *) fault_addr) {
+		sigsafe_err("got a fault for an unexpected address\n");
+		_exit(1);
+	}
+
+	/* Check if this fault originated from the restrictive pkey */
+	if (pkey != rest_pkey) {
+		sigsafe_err("got a fault for an unexpected pkey\n");
+		_exit(1);
+	}
+
+	/* Check if too many faults have occurred for the same iteration */
+	if (fault_count > 0) {
+		sigsafe_err("got too many faults for the same address\n");
+		_exit(1);
+	}
+
+	pgsize = getpagesize();
+	pgstart = (void *) ((unsigned long) fault_addr & ~(pgsize - 1));
+
+	/*
+	 * If the current fault occurred due to lack of execute rights,
+	 * reassociate the page with the exec-only pkey since execute
+	 * rights cannot be changed directly for the faulting pkey as
+	 * IAMR is inaccessible from userspace.
+	 *
+	 * Otherwise, if the current fault occurred due to lack of
+	 * read-write rights, change the AMR permission bits for the
+	 * pkey.
+	 *
+	 * This will let the test continue.
+	 */
+	if (rights == PKEY_DISABLE_EXECUTE &&
+	    mprotect(pgstart, pgsize, PROT_EXEC))
+		_exit(1);
+	else
+		pkey_set_rights(pkey, 0);
+
+	fault_count++;
+}
+
+struct region {
+	unsigned long rights;
+	unsigned int *base;
+	size_t size;
+};
+
+static void *protect(void *p)
+{
+	unsigned long rights;
+	unsigned int *base;
+	size_t size;
+	int tid, i;
+
+	tid = gettid();
+	base = ((struct region *) p)->base;
+	size = ((struct region *) p)->size;
+	FAIL_IF_EXIT(!base);
+
+	/* No read, write and execute restrictions */
+	rights = 0;
+
+	printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+
+	/* Allocate the permissive pkey */
+	perm_pkey = sys_pkey_alloc(0, rights);
+	FAIL_IF_EXIT(perm_pkey < 0);
+
+	/*
+	 * Repeatedly try to protect the common region with a permissive
+	 * pkey
+	 */
+	for (i = 0; i < NUM_ITERATIONS; i++) {
+		/*
+		 * Wait until the other thread has finished allocating the
+		 * restrictive pkey or until the next iteration has begun
+		 */
+		pthread_barrier_wait(&iteration_barrier);
+
+		/* Try to associate the permissive pkey with the region */
+		FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+					       perm_pkey));
+	}
+
+	/* Free the permissive pkey */
+	sys_pkey_free(perm_pkey);
+
+	return NULL;
+}
+
+static void *protect_access(void *p)
+{
+	size_t size, numinsns;
+	unsigned int *base;
+	int tid, i;
+
+	tid = gettid();
+	base = ((struct region *) p)->base;
+	size = ((struct region *) p)->size;
+	rights = ((struct region *) p)->rights;
+	numinsns = size / sizeof(base[0]);
+	FAIL_IF_EXIT(!base);
+
+	/* Allocate the restrictive pkey */
+	rest_pkey = sys_pkey_alloc(0, rights);
+	FAIL_IF_EXIT(rest_pkey < 0);
+
+	printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+	printf("tid %d, %s randomly in range [%p, %p]\n", tid,
+	       (rights == PKEY_DISABLE_EXECUTE) ? "execute" :
+	       (rights == PKEY_DISABLE_WRITE)  ? "write" : "read",
+	       base, base + numinsns);
+
+	/*
+	 * Repeatedly try to protect the common region with a restrictive
+	 * pkey and read, write or execute from it
+	 */
+	for (i = 0; i < NUM_ITERATIONS; i++) {
+		/*
+		 * Wait until the other thread has finished allocating the
+		 * permissive pkey or until the next iteration has begun
+		 */
+		pthread_barrier_wait(&iteration_barrier);
+
+		/* Try to associate the restrictive pkey with the region */
+		FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+					       rest_pkey));
+
+		/* Choose a random instruction word address from the region */
+		fault_addr = base + (rand() % numinsns);
+		fault_count = 0;
+
+		switch (rights) {
+		/* Read protection test */
+		case PKEY_DISABLE_ACCESS:
+			/*
+			 * Read an instruction word from the region and
+			 * verify if it has not been overwritten to
+			 * something unexpected
+			 */
+			FAIL_IF_EXIT(*fault_addr != PPC_INST_NOP &&
+				     *fault_addr != PPC_INST_BLR);
+			break;
+
+		/* Write protection test */
+		case PKEY_DISABLE_WRITE:
+			/*
+			 * Write an instruction word to the region and
+			 * verify if the overwrite has succeeded
+			 */
+			*fault_addr = PPC_INST_BLR;
+			FAIL_IF_EXIT(*fault_addr != PPC_INST_BLR);
+			break;
+
+		/* Execute protection test */
+		case PKEY_DISABLE_EXECUTE:
+			/* Jump to the region and execute instructions */
+			asm volatile(
+				"mtctr	%0; bctrl"
+				: : "r"(fault_addr) : "ctr", "lr");
+			break;
+		}
+
+		/*
+		 * Restore the restrictions originally imposed by the
+		 * restrictive pkey as the signal handler would have
+		 * cleared out the corresponding AMR bits
+		 */
+		pkey_set_rights(rest_pkey, rights);
+	}
+
+	/* Free restrictive pkey */
+	sys_pkey_free(rest_pkey);
+
+	return NULL;
+}
+
+static void reset_pkeys(unsigned long rights)
+{
+	int pkeys[NR_PKEYS], i;
+
+	/* Exhaustively allocate all available pkeys */
+	for (i = 0; i < NR_PKEYS; i++)
+		pkeys[i] = sys_pkey_alloc(0, rights);
+
+	/* Free all allocated pkeys */
+	for (i = 0; i < NR_PKEYS; i++)
+		sys_pkey_free(pkeys[i]);
+}
+
+static int test(void)
+{
+	pthread_t prot_thread, pacc_thread;
+	struct sigaction act;
+	pthread_attr_t attr;
+	size_t numinsns;
+	struct region r;
+	int ret, i;
+
+	srand(time(NULL));
+	ret = pkeys_unsupported();
+	if (ret)
+		return ret;
+
+	/* Allocate the region */
+	r.size = getpagesize();
+	r.base = mmap(NULL, r.size, PROT_RWX,
+		      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	FAIL_IF(r.base == MAP_FAILED);
+
+	/*
+	 * Fill the region with no-ops with a branch at the end
+	 * for returning to the caller
+	 */
+	numinsns = r.size / sizeof(r.base[0]);
+	for (i = 0; i < numinsns - 1; i++)
+		r.base[i] = PPC_INST_NOP;
+	r.base[i] = PPC_INST_BLR;
+
+	/* Setup SIGSEGV handler */
+	act.sa_handler = 0;
+	act.sa_sigaction = segv_handler;
+	FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0);
+	act.sa_flags = SA_SIGINFO;
+	act.sa_restorer = 0;
+	FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+	/*
+	 * For these tests, the parent process should clear all bits of
+	 * AMR and IAMR, i.e. impose no restrictions, for all available
+	 * pkeys. This will be the base for the initial AMR and IAMR
+	 * values for all the test thread pairs.
+	 *
+	 * If the AMR and IAMR bits of all available pkeys are cleared
+	 * before running the tests and a fault is generated when
+	 * attempting to read, write or execute instructions from a
+	 * pkey protected region, the pkey responsible for this must be
+	 * the one from the protect-and-access thread since the other
+	 * one is fully permissive. Despite that, if the pkey reported
+	 * by siginfo is not the restrictive pkey, then there must be a
+	 * kernel bug.
+	 */
+	reset_pkeys(0);
+
+	/* Setup barrier for protect and protect-and-access threads */
+	FAIL_IF(pthread_attr_init(&attr) != 0);
+	FAIL_IF(pthread_barrier_init(&iteration_barrier, NULL, 2) != 0);
+
+	/* Setup and start protect and protect-and-read threads */
+	puts("starting thread pair (protect, protect-and-read)");
+	r.rights = PKEY_DISABLE_ACCESS;
+	FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+	FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+	FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+	FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+	/* Setup and start protect and protect-and-write threads */
+	puts("starting thread pair (protect, protect-and-write)");
+	r.rights = PKEY_DISABLE_WRITE;
+	FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+	FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+	FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+	FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+	/* Setup and start protect and protect-and-execute threads */
+	puts("starting thread pair (protect, protect-and-execute)");
+	r.rights = PKEY_DISABLE_EXECUTE;
+	FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+	FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+	FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+	FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+	/* Cleanup */
+	FAIL_IF(pthread_attr_destroy(&attr) != 0);
+	FAIL_IF(pthread_barrier_destroy(&iteration_barrier) != 0);
+	munmap(r.base, r.size);
+
+	return 0;
+}
+
+int main(void)
+{
+	test_harness(test, "pkey_siginfo");
+}
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
deleted file mode 100644
index e2eed65..0000000
--- a/tools/testing/selftests/powerpc/mm/prot_sao.c
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2016, Michael Ellerman, IBM Corp.
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-
-#include <asm/cputable.h>
-
-#include "utils.h"
-
-#define SIZE (64 * 1024)
-
-int test_prot_sao(void)
-{
-	char *p;
-
-	/* 2.06 or later should support SAO */
-	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
-
-	/*
-	 * Ensure we can ask for PROT_SAO.
-	 * We can't really verify that it does the right thing, but at least we
-	 * confirm the kernel will accept it.
-	 */
-	p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO,
-		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-	FAIL_IF(p == MAP_FAILED);
-
-	/* Write to the mapping, to at least cause a fault */
-	memset(p, 0xaa, SIZE);
-
-	return 0;
-}
-
-int main(void)
-{
-	return test_harness(test_prot_sao, "prot-sao");
-}
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
new file mode 100644
index 0000000..ed91439
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that loads/stores expand the stack segment, or trigger a SEGV, in
+ * various conditions.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#undef NDEBUG
+#include <assert.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+volatile char *stack_top_ptr;
+volatile unsigned long stack_top_sp;
+volatile char c;
+
+enum access_type {
+	LOAD,
+	STORE,
+};
+
+/*
+ * Consume stack until the stack pointer is below @target_sp, then do an access
+ * (load or store) at offset @delta from either the base of the stack or the
+ * current stack pointer.
+ */
+__attribute__ ((noinline))
+int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type)
+{
+	unsigned long target;
+	char stack_cur;
+
+	if ((unsigned long)&stack_cur > target_sp)
+		return consume_stack(target_sp, stack_high, delta, type);
+	else {
+		// We don't really need this, but without it GCC might not
+		// generate a recursive call above.
+		stack_top_ptr = &stack_cur;
+
+#ifdef __powerpc__
+		asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp));
+#else
+		asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp));
+#endif
+		target = stack_high - delta + 1;
+		volatile char *p = (char *)target;
+
+		if (type == STORE)
+			*p = c;
+		else
+			c = *p;
+
+		// Do something to prevent the stack frame being popped prior to
+		// our access above.
+		getpid();
+	}
+
+	return 0;
+}
+
+static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high)
+{
+	unsigned long start, end;
+	static char buf[4096];
+	char name[128];
+	FILE *f;
+	int rc;
+
+	f = fopen("/proc/self/maps", "r");
+	if (!f) {
+		perror("fopen");
+		return -1;
+	}
+
+	while (fgets(buf, sizeof(buf), f)) {
+		rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n",
+			    &start, &end, name);
+		if (rc == 2)
+			continue;
+
+		if (rc != 3) {
+			printf("sscanf errored\n");
+			rc = -1;
+			break;
+		}
+
+		if (strstr(name, needle)) {
+			*low = start;
+			*high = end - 1;
+			rc = 0;
+			break;
+		}
+	}
+
+	fclose(f);
+
+	return rc;
+}
+
+int child(unsigned int stack_used, int delta, enum access_type type)
+{
+	unsigned long low, stack_high;
+
+	assert(search_proc_maps("[stack]", &low, &stack_high) == 0);
+
+	assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0);
+
+	printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n",
+	       type == LOAD ? "load" : "store", delta, stack_used, stack_high,
+	       stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1);
+
+	return 0;
+}
+
+static int test_one(unsigned int stack_used, int delta, enum access_type type)
+{
+	pid_t pid;
+	int rc;
+
+	pid = fork();
+	if (pid == 0)
+		exit(child(stack_used, delta, type));
+
+	assert(waitpid(pid, &rc, 0) != -1);
+
+	if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0)
+		return 0;
+
+	// We don't expect a non-zero exit that's not a signal
+	assert(!WIFEXITED(rc));
+
+	printf("Faulted:   %s delta %-7d used size 0x%06x signal %d\n",
+	       type == LOAD ? "load" : "store", delta, stack_used,
+	       WTERMSIG(rc));
+
+	return 1;
+}
+
+// This is fairly arbitrary but is well below any of the targets below,
+// so that the delta between the stack pointer and the target is large.
+#define DEFAULT_SIZE	(32 * _KB)
+
+static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur)
+{
+	unsigned long delta;
+
+	// We should be able to access anywhere within the rlimit
+	for (delta = page_size; delta <= rlim_cur; delta += page_size)
+		assert(test_one(DEFAULT_SIZE, delta, type) == 0);
+
+	assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0);
+
+	// But if we go past the rlimit it should fail
+	assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0);
+}
+
+static int test(void)
+{
+	unsigned long page_size;
+	struct rlimit rlimit;
+
+	page_size = getpagesize();
+	getrlimit(RLIMIT_STACK, &rlimit);
+	printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur);
+
+	printf("Testing loads ...\n");
+	test_one_type(LOAD, page_size, rlimit.rlim_cur);
+	printf("Testing stores ...\n");
+	test_one_type(STORE, page_size, rlimit.rlim_cur);
+
+	printf("All OK\n");
+
+	return 0;
+}
+
+#ifdef __powerpc__
+#include "utils.h"
+
+int main(void)
+{
+	return test_harness(test, "stack_expansion_ldst");
+}
+#else
+int main(void)
+{
+	return test();
+}
+#endif
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
new file mode 100644
index 0000000..c8b32a2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that signal delivery is able to expand the stack segment without
+ * triggering a SEGV.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../pmu/lib.h"
+#include "utils.h"
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+static char *stack_base_ptr;
+static char *stack_top_ptr;
+
+static volatile sig_atomic_t sig_occurred = 0;
+
+static void sigusr1_handler(int signal_arg)
+{
+	sig_occurred = 1;
+}
+
+static int consume_stack(unsigned int stack_size, union pipe write_pipe)
+{
+	char stack_cur;
+
+	if ((stack_base_ptr - &stack_cur) < stack_size)
+		return consume_stack(stack_size, write_pipe);
+	else {
+		stack_top_ptr = &stack_cur;
+
+		FAIL_IF(notify_parent(write_pipe));
+
+		while (!sig_occurred)
+			barrier();
+	}
+
+	return 0;
+}
+
+static int child(unsigned int stack_size, union pipe write_pipe)
+{
+	struct sigaction act;
+	char stack_base;
+
+	act.sa_handler = sigusr1_handler;
+	sigemptyset(&act.sa_mask);
+	act.sa_flags = 0;
+	if (sigaction(SIGUSR1, &act, NULL) < 0)
+		err(1, "sigaction");
+
+	stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL);
+
+	FAIL_IF(consume_stack(stack_size, write_pipe));
+
+	printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n",
+		stack_size, stack_base_ptr, stack_top_ptr,
+		stack_base_ptr - stack_top_ptr);
+
+	return 0;
+}
+
+static int test_one_size(unsigned int stack_size)
+{
+	union pipe read_pipe, write_pipe;
+	pid_t pid;
+
+	FAIL_IF(pipe(read_pipe.fds) == -1);
+	FAIL_IF(pipe(write_pipe.fds) == -1);
+
+	pid = fork();
+	if (pid == 0) {
+		close(read_pipe.read_fd);
+		close(write_pipe.write_fd);
+		exit(child(stack_size, read_pipe));
+	}
+
+	close(read_pipe.write_fd);
+	close(write_pipe.read_fd);
+	FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+	kill(pid, SIGUSR1);
+
+	FAIL_IF(wait_for_child(pid));
+
+	close(read_pipe.read_fd);
+	close(write_pipe.write_fd);
+
+	return 0;
+}
+
+int test(void)
+{
+	unsigned int i, size;
+
+	// Test with used stack from 1MB - 64K to 1MB + 64K
+	// Increment by 64 to get more coverage of odd sizes
+	for (i = 0; i < (128 * _KB); i += 64) {
+		size = i + (1 * _MB) - (64 * _KB);
+		FAIL_IF(test_one_size(size));
+	}
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test, "stack_expansion_signal");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
index 7b4ac45..2980abc 100644
--- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -9,6 +9,7 @@
 #include <stdbool.h>
 #include <string.h>
 #include <sys/prctl.h>
+#include <asm/cputable.h>
 
 #include "event.h"
 #include "utils.h"
@@ -104,6 +105,9 @@
 	struct event events[3];
 	u64 overhead;
 
+	// The STCX_FAIL event we use works on Power8 or later
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions");
 	setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles");
 	setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail");
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
index a2d7b0e..a26ac12 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@@ -91,8 +91,6 @@
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
index bc89381..bb9f587 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
@@ -42,8 +42,6 @@
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
index dcd351d..9ae795c 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
@@ -99,8 +99,6 @@
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	printf("EBBs while frozen %d\n", ebbs_while_frozen);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
index 94c99c12..4b45a2e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -71,8 +71,6 @@
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
index dfbc5c3..21537d6 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -396,8 +396,6 @@
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
index ca2f7d72..b208bf6 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@@ -38,8 +38,6 @@
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	FAIL_IF(ebb_state.stats.ebb_count == 0);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
index ac3e6e1..ba2681a 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
@@ -75,7 +75,6 @@
 	ebb_freeze_pmcs();
 	ebb_global_disable();
 
-	count_pmc(4, sample_period);
 	mtspr(SPRN_PMC4, 0xdead);
 
 	dump_summary_ebb_state();
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
index b8242e9..791d37b 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
@@ -70,13 +70,6 @@
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-	count_pmc(2, sample_period);
-	count_pmc(3, sample_period);
-	count_pmc(4, sample_period);
-	count_pmc(5, sample_period);
-	count_pmc(6, sample_period);
-
 	dump_ebb_state();
 
 	for (i = 0; i < 6; i++)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
index a05c0e1..9b0f70d 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -61,8 +61,6 @@
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_summary_ebb_state();
 
 	event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
index 153ebc9..2904c74 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
@@ -82,8 +82,6 @@
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(1, sample_period);
-
 	dump_ebb_state();
 
 	if (mmcr0_mismatch)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
index eadad75..b29f8ba 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
@@ -76,8 +76,6 @@
 	ebb_global_disable();
 	ebb_freeze_pmcs();
 
-	count_pmc(2, sample_period);
-
 	dump_ebb_state();
 
 	printf("PMC5/6 overflow %d\n", pmc56_overflowed);
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index fa12e7d..bf1bec0 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -6,6 +6,7 @@
 #ifndef __SELFTESTS_POWERPC_PMU_LIB_H
 #define __SELFTESTS_POWERPC_PMU_LIB_H
 
+#include <stdbool.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <string.h>
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
index 2756fe2..2d37942 100644
--- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -12,6 +12,8 @@
 #include <string.h>
 #include <sys/prctl.h>
 
+#include <asm/cputable.h>
+
 #include "event.h"
 #include "lib.h"
 #include "utils.h"
@@ -23,12 +25,9 @@
 static int per_event_excludes(void)
 {
 	struct event *e, events[4];
-	char *platform;
 	int i;
 
-	platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
-	FAIL_IF(!platform);
-	SKIP_IF(strcmp(platform, "power8") != 0);
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
 
 	/*
 	 * We need to create the events disabled, otherwise the running/enabled
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index d5c64fe..bbc05ff 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -150,7 +150,7 @@
 	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
 	       user_write, info->amr, pkey1, pkey2, pkey3);
 
-	mtspr(SPRN_AMR, info->amr);
+	set_amr(info->amr);
 
 	/*
 	 * We won't use pkey3. This tests whether the kernel restores the UAMOR
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index bdbbbe8..bc454f8 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -44,7 +44,7 @@
 	unsigned long amr2;
 
 	/* AMR value that ptrace should refuse to write to the child. */
-	unsigned long amr3;
+	unsigned long invalid_amr;
 
 	/* IAMR value the parent expects to read from the child. */
 	unsigned long expected_iamr;
@@ -57,8 +57,8 @@
 	 * (even though they're valid ones) because userspace doesn't have
 	 * access to those registers.
 	 */
-	unsigned long new_iamr;
-	unsigned long new_uamor;
+	unsigned long invalid_iamr;
+	unsigned long invalid_uamor;
 };
 
 static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
@@ -66,11 +66,6 @@
 	return syscall(__NR_pkey_alloc, flags, init_access_rights);
 }
 
-static int sys_pkey_free(int pkey)
-{
-	return syscall(__NR_pkey_free, pkey);
-}
-
 static int child(struct shared_info *info)
 {
 	unsigned long reg;
@@ -100,33 +95,37 @@
 
 	info->amr1 |= 3ul << pkeyshift(pkey1);
 	info->amr2 |= 3ul << pkeyshift(pkey2);
-	info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3);
+	/*
+	 * invalid amr value where we try to force write
+	 * things which are deined by a uamor setting.
+	 */
+	info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor);
 
+	/*
+	 * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr
+	 */
 	if (disable_execute)
 		info->expected_iamr |= 1ul << pkeyshift(pkey1);
 	else
 		info->expected_iamr &= ~(1ul << pkeyshift(pkey1));
 
-	info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
-
-	info->expected_uamor |= 3ul << pkeyshift(pkey1) |
-				3ul << pkeyshift(pkey2);
-	info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2);
-	info->new_uamor |= 3ul << pkeyshift(pkey1);
+	/*
+	 * We allocated pkey2 and pkey 3 above. Clear the IAMR bits.
+	 */
+	info->expected_iamr &= ~(1ul << pkeyshift(pkey2));
+	info->expected_iamr &= ~(1ul << pkeyshift(pkey3));
 
 	/*
-	 * We won't use pkey3. We just want a plausible but invalid key to test
-	 * whether ptrace will let us write to AMR bits we are not supposed to.
-	 *
-	 * This also tests whether the kernel restores the UAMOR permissions
-	 * after a key is freed.
+	 * Create an IAMR value different from expected value.
+	 * Kernel will reject an IAMR and UAMOR change.
 	 */
-	sys_pkey_free(pkey3);
+	info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2));
+	info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1));
 
 	printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
 	       user_write, info->amr1, pkey1, pkey2, pkey3);
 
-	mtspr(SPRN_AMR, info->amr1);
+	set_amr(info->amr1);
 
 	/* Wait for parent to read our AMR value and write a new one. */
 	ret = prod_parent(&info->child_sync);
@@ -196,9 +195,9 @@
 	PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
 	PARENT_FAIL_IF(ret, &info->child_sync);
 
-	info->amr1 = info->amr2 = info->amr3 = regs[0];
-	info->expected_iamr = info->new_iamr = regs[1];
-	info->expected_uamor = info->new_uamor = regs[2];
+	info->amr1 = info->amr2 = regs[0];
+	info->expected_iamr = regs[1];
+	info->expected_uamor = regs[2];
 
 	/* Wake up child so that it can set itself up. */
 	ret = prod_child(&info->child_sync);
@@ -234,10 +233,10 @@
 		return ret;
 
 	/* Write invalid AMR value in child. */
-	ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1);
+	ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1);
 	PARENT_FAIL_IF(ret, &info->child_sync);
 
-	printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3);
+	printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr);
 
 	/* Wake up child so that it can verify it didn't change. */
 	ret = prod_child(&info->child_sync);
@@ -249,7 +248,7 @@
 
 	/* Try to write to IAMR. */
 	regs[0] = info->amr1;
-	regs[1] = info->new_iamr;
+	regs[1] = info->invalid_iamr;
 	ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
 	PARENT_FAIL_IF(!ret, &info->child_sync);
 
@@ -257,7 +256,7 @@
 	       ptrace_write_running, regs[0], regs[1]);
 
 	/* Try to write to IAMR and UAMOR. */
-	regs[2] = info->new_uamor;
+	regs[2] = info->invalid_uamor;
 	ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
 	PARENT_FAIL_IF(!ret, &info->child_sync);
 
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
index 58cb1a8..4436ca9 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -78,6 +78,9 @@
 	pid_t pid;
 	int ret, status;
 
+	// TAR was added in v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
 	pid = fork();
 	if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
index c4fe0e8..cb9875f 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -61,6 +61,8 @@
 	pid_t pid;
 	int ret, status, i;
 
+	SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
 	shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
 
 	for (i = 0; i < VEC_MAX; i++)
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index 8c6b982..c8d82b7 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -183,6 +183,16 @@
 		if (miss_percent > 15) {
 			printf("Branch misses > 15%% unexpected in this configuration!\n");
 			printf("Possible mis-match between reported & actual mitigation\n");
+			/*
+			 * Such a mismatch may be caused by a guest system
+			 * reporting as vulnerable when the host is mitigated.
+			 * Return skip code to avoid detecting this as an error.
+			 * We are not vulnerable and reporting otherwise, so
+			 * missing such a mismatch is safe.
+			 */
+			if (state == VULNERABLE)
+				return 4;
+
 			return 1;
 		}
 		break;
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 7fc0623..9c39f55 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -8,7 +8,7 @@
 
 TEST_GEN_PROGS := memcmp_64 strlen
 
-$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: memcmp.c ../utils.c
 $(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
 
 ifeq ($(build_32bit),1)
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index b1fa754..979df3d 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -2,7 +2,9 @@
 #include <malloc.h>
 #include <stdlib.h>
 #include <string.h>
+#include <sys/mman.h>
 #include <time.h>
+#include <asm/cputable.h>
 #include "utils.h"
 
 #define SIZE 256
@@ -13,6 +15,9 @@
 #define LARGE_MAX_OFFSET 32
 #define LARGE_SIZE_START 4096
 
+/* This is big enough to fit LARGE_SIZE and works on 4K & 64K kernels */
+#define MAP_SIZE (64 * 1024)
+
 #define MAX_OFFSET_DIFF_S1_S2 48
 
 int vmx_count;
@@ -68,25 +73,25 @@
 
 static int testcase(bool islarge)
 {
-	char *s1;
-	char *s2;
-	unsigned long i;
+	unsigned long i, comp_size, alloc_size;
+	char *p, *s1, *s2;
+	int iterations;
 
-	unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
-	unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
-	int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+	comp_size = (islarge ? LARGE_SIZE : SIZE);
+	alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+	iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
 
-	s1 = memalign(128, alloc_size);
-	if (!s1) {
-		perror("memalign");
-		exit(1);
-	}
+	p = mmap(NULL, 4 * MAP_SIZE, PROT_READ | PROT_WRITE,
+		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	FAIL_IF(p == MAP_FAILED);
 
-	s2 = memalign(128, alloc_size);
-	if (!s2) {
-		perror("memalign");
-		exit(1);
-	}
+	/* Put s1/s2 at the end of a page */
+	s1 = p + MAP_SIZE - alloc_size;
+	s2 = p + 3 * MAP_SIZE - alloc_size;
+
+	/* And unmap the subsequent page to force a fault if we overread */
+	munmap(p + MAP_SIZE, MAP_SIZE);
+	munmap(p + 3 * MAP_SIZE, MAP_SIZE);
 
 	srandom(time(0));
 
@@ -147,6 +152,11 @@
 
 static int testcases(void)
 {
+#ifdef __powerpc64__
+	// vcmpequd used in memcmp_64.S is v2.07
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+#endif
+
 	testcase(0);
 	testcase(1);
 	return 0;
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 5ee0e98..18b6a77 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -16,6 +16,7 @@
 #include <string.h>
 #include <sys/ioctl.h>
 #include <sys/stat.h>
+#include <sys/sysinfo.h>
 #include <sys/types.h>
 #include <sys/utsname.h>
 #include <unistd.h>
@@ -88,28 +89,40 @@
 
 int pick_online_cpu(void)
 {
-	cpu_set_t mask;
-	int cpu;
+	int ncpus, cpu = -1;
+	cpu_set_t *mask;
+	size_t size;
 
-	CPU_ZERO(&mask);
-
-	if (sched_getaffinity(0, sizeof(mask), &mask)) {
-		perror("sched_getaffinity");
+	ncpus = get_nprocs_conf();
+	size = CPU_ALLOC_SIZE(ncpus);
+	mask = CPU_ALLOC(ncpus);
+	if (!mask) {
+		perror("malloc");
 		return -1;
 	}
 
+	CPU_ZERO_S(size, mask);
+
+	if (sched_getaffinity(0, size, mask)) {
+		perror("sched_getaffinity");
+		goto done;
+	}
+
 	/* We prefer a primary thread, but skip 0 */
-	for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8)
-		if (CPU_ISSET(cpu, &mask))
-			return cpu;
+	for (cpu = 8; cpu < ncpus; cpu += 8)
+		if (CPU_ISSET_S(cpu, size, mask))
+			goto done;
 
 	/* Search for anything, but in reverse */
-	for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--)
-		if (CPU_ISSET(cpu, &mask))
-			return cpu;
+	for (cpu = ncpus - 1; cpu >= 0; cpu--)
+		if (CPU_ISSET_S(cpu, size, mask))
+			goto done;
 
 	printf("No cpus in affinity mask?!\n");
-	return -1;
+
+done:
+	CPU_FREE(mask);
+	return cpu;
 }
 
 bool is_ppc64le(void)
@@ -293,3 +306,31 @@
 
 	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
 }
+
+int using_hash_mmu(bool *using_hash)
+{
+	char line[128];
+	FILE *f;
+	int rc;
+
+	f = fopen("/proc/cpuinfo", "r");
+	FAIL_IF(!f);
+
+	rc = 0;
+	while (fgets(line, sizeof(line), f) != NULL) {
+		if (strcmp(line, "MMU		: Hash\n") == 0) {
+			*using_hash = true;
+			goto out;
+		}
+
+		if (strcmp(line, "MMU		: Radix\n") == 0) {
+			*using_hash = false;
+			goto out;
+		}
+	}
+
+	rc = -1;
+out:
+	fclose(f);
+	return rc;
+}