Merge tag 'dmaengine-5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine

Pull dmaengine updates from Vinod Koul:
 "Core:
   - Support out of order dma completion
   - Support for repeating transaction

  New controllers:
   - Support for Actions S700 DMA engine
   - Renesas R8A774E1, r8a7742 controller binding
   - New driver for Xilinx DPDMA controller

  Other:
   - Support of out of order dma completion in idxd driver
   - W=1 warning cleanup of subsystem
   - Updates to ti-k3-dma, dw, idxd drivers"

* tag 'dmaengine-5.9-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine: (68 commits)
  dmaengine: dw: Don't include unneeded header to platform data header
  dmaengine: Actions: Add support for S700 DMA engine
  dmaengine: Actions: get rid of bit fields from dma descriptor
  dt-bindings: dmaengine: convert Actions Semi Owl SoCs bindings to yaml
  dmaengine: idxd: add missing invalid flags field to completion
  dmaengine: dw: Initialize max_sg_burst capability
  dmaengine: dw: Introduce max burst length hw config
  dmaengine: dw: Initialize min and max burst DMA device capability
  dmaengine: dw: Set DMA device max segment size parameter
  dmaengine: dw: Take HC_LLP flag into account for noLLP auto-config
  dmaengine: Introduce DMA-device device_caps callback
  dmaengine: Introduce max SG burst capability
  dmaengine: Introduce min burst length capability
  dt-bindings: dma: dw: Add max burst transaction length property
  dt-bindings: dma: dw: Convert DW DMAC to DT binding
  dmaengine: ti: k3-udma: Query throughput level information from hardware
  dmaengine: ti: k3-udma: Use defines for capabilities register parsing
  dmaengine: xilinx: dpdma: Fix kerneldoc warning
  dmaengine: xilinx: dpdma: add missing kernel doc
  dmaengine: xilinx: dpdma: remove comparison of unsigned expression
  ...
diff --git a/Documentation/ABI/stable/sysfs-driver-dma-idxd b/Documentation/ABI/stable/sysfs-driver-dma-idxd
index b5bebf6..1af9c41 100644
--- a/Documentation/ABI/stable/sysfs-driver-dma-idxd
+++ b/Documentation/ABI/stable/sysfs-driver-dma-idxd
@@ -1,47 +1,47 @@
-What:		sys/bus/dsa/devices/dsa<m>/version
+What:		/sys/bus/dsa/devices/dsa<m>/version
 Date:		Apr 15, 2020
 KernelVersion:	5.8.0
 Contact:	dmaengine@vger.kernel.org
 Description:	The hardware version number.
 
-What:           sys/bus/dsa/devices/dsa<m>/cdev_major
+What:           /sys/bus/dsa/devices/dsa<m>/cdev_major
 Date:           Oct 25, 2019
-KernelVersion: 	5.6.0
+KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:	The major number that the character device driver assigned to
 		this device.
 
-What:           sys/bus/dsa/devices/dsa<m>/errors
+What:           /sys/bus/dsa/devices/dsa<m>/errors
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The error information for this device.
 
-What:           sys/bus/dsa/devices/dsa<m>/max_batch_size
+What:           /sys/bus/dsa/devices/dsa<m>/max_batch_size
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The largest number of work descriptors in a batch.
 
-What:           sys/bus/dsa/devices/dsa<m>/max_work_queues_size
+What:           /sys/bus/dsa/devices/dsa<m>/max_work_queues_size
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The maximum work queue size supported by this device.
 
-What:           sys/bus/dsa/devices/dsa<m>/max_engines
+What:           /sys/bus/dsa/devices/dsa<m>/max_engines
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The maximum number of engines supported by this device.
 
-What:           sys/bus/dsa/devices/dsa<m>/max_groups
+What:           /sys/bus/dsa/devices/dsa<m>/max_groups
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The maximum number of groups can be created under this device.
 
-What:           sys/bus/dsa/devices/dsa<m>/max_tokens
+What:           /sys/bus/dsa/devices/dsa<m>/max_tokens
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
@@ -50,7 +50,7 @@
 		implementation, and these resources are allocated by engines to
 		support operations.
 
-What:           sys/bus/dsa/devices/dsa<m>/max_transfer_size
+What:           /sys/bus/dsa/devices/dsa<m>/max_transfer_size
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
@@ -58,57 +58,57 @@
 		perform the operation. The maximum transfer size is dependent on
 		the workqueue the descriptor was submitted to.
 
-What:           sys/bus/dsa/devices/dsa<m>/max_work_queues
+What:           /sys/bus/dsa/devices/dsa<m>/max_work_queues
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The maximum work queue number that this device supports.
 
-What:           sys/bus/dsa/devices/dsa<m>/numa_node
+What:           /sys/bus/dsa/devices/dsa<m>/numa_node
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The numa node number for this device.
 
-What:           sys/bus/dsa/devices/dsa<m>/op_cap
+What:           /sys/bus/dsa/devices/dsa<m>/op_cap
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The operation capability bit mask specify the operation types
 		supported by the this device.
 
-What:           sys/bus/dsa/devices/dsa<m>/state
+What:           /sys/bus/dsa/devices/dsa<m>/state
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The state information of this device. It can be either enabled
 		or disabled.
 
-What:           sys/bus/dsa/devices/dsa<m>/group<m>.<n>
+What:           /sys/bus/dsa/devices/dsa<m>/group<m>.<n>
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The assigned group under this device.
 
-What:           sys/bus/dsa/devices/dsa<m>/engine<m>.<n>
+What:           /sys/bus/dsa/devices/dsa<m>/engine<m>.<n>
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The assigned engine under this device.
 
-What:           sys/bus/dsa/devices/dsa<m>/wq<m>.<n>
+What:           /sys/bus/dsa/devices/dsa<m>/wq<m>.<n>
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The assigned work queue under this device.
 
-What:           sys/bus/dsa/devices/dsa<m>/configurable
+What:           /sys/bus/dsa/devices/dsa<m>/configurable
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    To indicate if this device is configurable or not.
 
-What:           sys/bus/dsa/devices/dsa<m>/token_limit
+What:           /sys/bus/dsa/devices/dsa<m>/token_limit
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
@@ -116,19 +116,19 @@
 		one time by operations that access low bandwidth memory in the
 		device.
 
-What:           sys/bus/dsa/devices/wq<m>.<n>/group_id
+What:           /sys/bus/dsa/devices/wq<m>.<n>/group_id
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The group id that this work queue belongs to.
 
-What:           sys/bus/dsa/devices/wq<m>.<n>/size
+What:           /sys/bus/dsa/devices/wq<m>.<n>/size
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The work queue size for this work queue.
 
-What:           sys/bus/dsa/devices/wq<m>.<n>/type
+What:           /sys/bus/dsa/devices/wq<m>.<n>/type
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
@@ -136,20 +136,20 @@
 		queue usages in the kernel space or "user" type for work queue
 		usages by applications in user space.
 
-What:           sys/bus/dsa/devices/wq<m>.<n>/cdev_minor
+What:           /sys/bus/dsa/devices/wq<m>.<n>/cdev_minor
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The minor number assigned to this work queue by the character
 		device driver.
 
-What:           sys/bus/dsa/devices/wq<m>.<n>/mode
+What:           /sys/bus/dsa/devices/wq<m>.<n>/mode
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The work queue mode type for this work queue.
 
-What:           sys/bus/dsa/devices/wq<m>.<n>/priority
+What:           /sys/bus/dsa/devices/wq<m>.<n>/priority
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
@@ -157,20 +157,20 @@
 		other work queue in the same group to control quality of service
 		for dispatching work from multiple workqueues in the same group.
 
-What:           sys/bus/dsa/devices/wq<m>.<n>/state
+What:           /sys/bus/dsa/devices/wq<m>.<n>/state
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The current state of the work queue.
 
-What:           sys/bus/dsa/devices/wq<m>.<n>/threshold
+What:           /sys/bus/dsa/devices/wq<m>.<n>/threshold
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
 Description:    The number of entries in this work queue that may be filled
 		via a limited portal.
 
-What:           sys/bus/dsa/devices/engine<m>.<n>/group_id
+What:           /sys/bus/dsa/devices/engine<m>.<n>/group_id
 Date:           Oct 25, 2019
 KernelVersion:  5.6.0
 Contact:        dmaengine@vger.kernel.org
diff --git a/Documentation/devicetree/bindings/dma/arm-pl330.txt b/Documentation/devicetree/bindings/dma/arm-pl330.txt
index 2c7fd19..315e901 100644
--- a/Documentation/devicetree/bindings/dma/arm-pl330.txt
+++ b/Documentation/devicetree/bindings/dma/arm-pl330.txt
@@ -16,6 +16,7 @@
   - dma-channels: contains the total number of DMA channels supported by the DMAC
   - dma-requests: contains the total number of DMA requests supported by the DMAC
   - arm,pl330-broken-no-flushp: quirk for avoiding to execute DMAFLUSHP
+  - arm,pl330-periph-burst: quirk for performing burst transfer only
   - resets: contains an entry for each entry in reset-names.
 	    See ../reset/reset.txt for details.
   - reset-names: must contain at least "dma", and optional is "dma-ocp".
diff --git a/Documentation/devicetree/bindings/dma/owl-dma.txt b/Documentation/devicetree/bindings/dma/owl-dma.txt
deleted file mode 100644
index 03e9bb12..0000000
--- a/Documentation/devicetree/bindings/dma/owl-dma.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-* Actions Semi Owl SoCs DMA controller
-
-This binding follows the generic DMA bindings defined in dma.txt.
-
-Required properties:
-- compatible: Should be "actions,s900-dma".
-- reg: Should contain DMA registers location and length.
-- interrupts: Should contain 4 interrupts shared by all channel.
-- #dma-cells: Must be <1>. Used to represent the number of integer
-              cells in the dmas property of client device.
-- dma-channels: Physical channels supported.
-- dma-requests: Number of DMA request signals supported by the controller.
-                Refer to Documentation/devicetree/bindings/dma/dma.txt
-- clocks: Phandle and Specifier of the clock feeding the DMA controller.
-
-Example:
-
-Controller:
-                dma: dma-controller@e0260000 {
-                        compatible = "actions,s900-dma";
-                        reg = <0x0 0xe0260000 0x0 0x1000>;
-                        interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
-                                     <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>,
-                                     <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>,
-                                     <GIC_SPI 60 IRQ_TYPE_LEVEL_HIGH>;
-                        #dma-cells = <1>;
-                        dma-channels = <12>;
-                        dma-requests = <46>;
-                        clocks = <&clock CLK_DMAC>;
-                };
-
-Client:
-
-DMA clients connected to the Actions Semi Owl SoCs DMA controller must
-use the format described in the dma.txt file, using a two-cell specifier
-for each channel.
-
-The two cells in order are:
-1. A phandle pointing to the DMA controller.
-2. The channel id.
-
-uart5: serial@e012a000 {
-        ...
-        dma-names = "tx", "rx";
-        dmas = <&dma 26>, <&dma 27>;
-        ...
-};
diff --git a/Documentation/devicetree/bindings/dma/owl-dma.yaml b/Documentation/devicetree/bindings/dma/owl-dma.yaml
new file mode 100644
index 0000000..256d62a
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/owl-dma.yaml
@@ -0,0 +1,79 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/owl-dma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Actions Semi Owl SoCs DMA controller
+
+description: |
+  The OWL DMA is a general-purpose direct memory access controller capable of
+  supporting 10 and 12 independent DMA channels for S700 and S900 SoCs
+  respectively.
+
+maintainers:
+  - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+
+allOf:
+  - $ref: "dma-controller.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - actions,s900-dma
+      - actions,s700-dma
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    description:
+      controller supports 4 interrupts, which are freely assignable to the
+      DMA channels.
+    maxItems: 4
+
+  "#dma-cells":
+    const: 1
+
+  dma-channels:
+    maximum: 12
+
+  dma-requests:
+    maximum: 46
+
+  clocks:
+    maxItems: 1
+    description:
+      Phandle and Specifier of the clock feeding the DMA controller.
+
+  power-domains:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - "#dma-cells"
+  - dma-channels
+  - dma-requests
+  - clocks
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    dma: dma-controller@e0260000 {
+        compatible = "actions,s900-dma";
+        reg = <0xe0260000 0x1000>;
+        interrupts = <GIC_SPI 57 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 58 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 59 IRQ_TYPE_LEVEL_HIGH>,
+                     <GIC_SPI 60 IRQ_TYPE_LEVEL_HIGH>;
+        #dma-cells = <1>;
+        dma-channels = <12>;
+        dma-requests = <46>;
+        clocks = <&clock 22>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
index b842dfd9..13f1a46 100644
--- a/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
+++ b/Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
@@ -23,6 +23,7 @@
           - renesas,dmac-r8a774a1 # RZ/G2M
           - renesas,dmac-r8a774b1 # RZ/G2N
           - renesas,dmac-r8a774c0 # RZ/G2E
+          - renesas,dmac-r8a774e1 # RZ/G2H
           - renesas,dmac-r8a7790  # R-Car H2
           - renesas,dmac-r8a7791  # R-Car M2-W
           - renesas,dmac-r8a7792  # R-Car V2H
diff --git a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.yaml b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.yaml
index 9ca6d8d..ab287c6 100644
--- a/Documentation/devicetree/bindings/dma/renesas,usb-dmac.yaml
+++ b/Documentation/devicetree/bindings/dma/renesas,usb-dmac.yaml
@@ -16,6 +16,7 @@
   compatible:
     items:
       - enum:
+          - renesas,r8a7742-usb-dmac  # RZ/G1H
           - renesas,r8a7743-usb-dmac  # RZ/G1M
           - renesas,r8a7744-usb-dmac  # RZ/G1N
           - renesas,r8a7745-usb-dmac  # RZ/G1E
@@ -23,6 +24,7 @@
           - renesas,r8a774a1-usb-dmac # RZ/G2M
           - renesas,r8a774b1-usb-dmac # RZ/G2N
           - renesas,r8a774c0-usb-dmac # RZ/G2E
+          - renesas,r8a774e1-usb-dmac # RZ/G2H
           - renesas,r8a7790-usb-dmac  # R-Car H2
           - renesas,r8a7791-usb-dmac  # R-Car M2-W
           - renesas,r8a7793-usb-dmac  # R-Car M2-N
diff --git a/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml b/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml
new file mode 100644
index 0000000..20870f5
--- /dev/null
+++ b/Documentation/devicetree/bindings/dma/snps,dma-spear1340.yaml
@@ -0,0 +1,176 @@
+# SPDX-License-Identifier: GPL-2.0-only
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/snps,dma-spear1340.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys Designware DMA Controller
+
+maintainers:
+  - Viresh Kumar <vireshk@kernel.org>
+  - Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+
+allOf:
+  - $ref: "dma-controller.yaml#"
+
+properties:
+  compatible:
+    const: snps,dma-spear1340
+
+  "#dma-cells":
+    const: 3
+    description: |
+      First cell is a phandle pointing to the DMA controller. Second one is
+      the DMA request line number. Third cell is the memory master identifier
+      for transfers on dynamically allocated channel. Fourth cell is the
+      peripheral master identifier for transfers on an allocated channel.
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  clock-names:
+    description: AHB interface reference clock.
+    const: hclk
+
+  dma-channels:
+    description: |
+      Number of DMA channels supported by the controller. In case if
+      not specified the driver will try to auto-detect this and
+      the rest of the optional parameters.
+    minimum: 1
+    maximum: 8
+
+  dma-requests:
+    minimum: 1
+    maximum: 16
+
+  dma-masters:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+      Number of DMA masters supported by the controller. In case if
+      not specified the driver will try to auto-detect this and
+      the rest of the optional parameters.
+    minimum: 1
+    maximum: 4
+
+  chan_allocation_order:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+      DMA channels allocation order specifier. Zero means ascending order
+      (first free allocated), while one - descending (last free allocated).
+    default: 0
+    enum: [0, 1]
+
+  chan_priority:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+      DMA channels priority order. Zero means ascending channels priority
+      so the very first channel has the highest priority. While 1 means
+      descending priority (the last channel has the highest priority).
+    default: 0
+    enum: [0, 1]
+
+  block_size:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: Maximum block size supported by the DMA controller.
+    enum: [3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095]
+
+  data-width:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    description: Data bus width per each DMA master in bytes.
+    items:
+      maxItems: 4
+      items:
+        enum: [4, 8, 16, 32]
+
+  data_width:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    deprecated: true
+    description: |
+      Data bus width per each DMA master in (2^n * 8) bits. This property is
+      deprecated. It' usage is discouraged in favor of data-width one. Moreover
+      the property incorrectly permits to define data-bus width of 8 and 16
+      bits, which is impossible in accordance with DW DMAC IP-core data book.
+    items:
+      maxItems: 4
+      items:
+        enum:
+          - 0 # 8 bits
+          - 1 # 16 bits
+          - 2 # 32 bits
+          - 3 # 64 bits
+          - 4 # 128 bits
+          - 5 # 256 bits
+        default: 0
+
+  multi-block:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    description: |
+      LLP-based multi-block transfer supported by hardware per
+      each DMA channel.
+    items:
+      maxItems: 8
+      items:
+        enum: [0, 1]
+        default: 1
+
+  snps,max-burst-len:
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    description: |
+      Maximum length of the burst transactions supported by the controller.
+      This property defines the upper limit of the run-time burst setting
+      (CTLx.SRC_MSIZE/CTLx.DST_MSIZE fields) so the allowed burst length
+      will be from 1 to max-burst-len words. It's an array property with one
+      cell per channel in the units determined by the value set in the
+      CTLx.SRC_TR_WIDTH/CTLx.DST_TR_WIDTH fields (data width).
+    items:
+      maxItems: 8
+      items:
+        enum: [4, 8, 16, 32, 64, 128, 256]
+        default: 256
+
+  snps,dma-protection-control:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+      Bits one-to-one passed to the AHB HPROT[3:1] bus. Each bit setting
+      indicates the following features: bit 0 - privileged mode,
+      bit 1 - DMA is bufferable, bit 2 - DMA is cacheable.
+    default: 0
+    minimum: 0
+    maximum: 7
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - "#dma-cells"
+  - reg
+  - interrupts
+
+examples:
+  - |
+    dma-controller@fc000000 {
+      compatible = "snps,dma-spear1340";
+      reg = <0xfc000000 0x1000>;
+      interrupt-parent = <&vic1>;
+      interrupts = <12>;
+
+      dma-channels = <8>;
+      dma-requests = <16>;
+      dma-masters = <4>;
+      #dma-cells = <3>;
+
+      chan_allocation_order = <1>;
+      chan_priority = <1>;
+      block_size = <0xfff>;
+      data-width = <8 8>;
+      multi-block = <0 0 0 0 0 0 0 0>;
+      snps,max-burst-len = <16 16 4 4 4 4 4 4>;
+    };
+...
diff --git a/Documentation/devicetree/bindings/dma/snps-dma.txt b/Documentation/devicetree/bindings/dma/snps-dma.txt
deleted file mode 100644
index 0bedcee..0000000
--- a/Documentation/devicetree/bindings/dma/snps-dma.txt
+++ /dev/null
@@ -1,69 +0,0 @@
-* Synopsys Designware DMA Controller
-
-Required properties:
-- compatible: "snps,dma-spear1340"
-- reg: Address range of the DMAC registers
-- interrupt: Should contain the DMAC interrupt number
-- dma-channels: Number of channels supported by hardware
-- dma-requests: Number of DMA request lines supported, up to 16
-- dma-masters: Number of AHB masters supported by the controller
-- #dma-cells: must be <3>
-- chan_allocation_order: order of allocation of channel, 0 (default): ascending,
-  1: descending
-- chan_priority: priority of channels. 0 (default): increase from chan 0->n, 1:
-  increase from chan n->0
-- block_size: Maximum block size supported by the controller
-- data-width: Maximum data width supported by hardware per AHB master
-  (in bytes, power of 2)
-
-
-Deprecated properties:
-- data_width: Maximum data width supported by hardware per AHB master
-  (0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
-
-
-Optional properties:
-- multi-block: Multi block transfers supported by hardware. Array property with
-  one cell per channel. 0: not supported, 1 (default): supported.
-- snps,dma-protection-control: AHB HPROT[3:1] protection setting.
-  The default value is 0 (for non-cacheable, non-buffered,
-  unprivileged data access).
-  Refer to include/dt-bindings/dma/dw-dmac.h for possible values.
-
-Example:
-
-	dmahost: dma@fc000000 {
-		compatible = "snps,dma-spear1340";
-		reg = <0xfc000000 0x1000>;
-		interrupt-parent = <&vic1>;
-		interrupts = <12>;
-
-		dma-channels = <8>;
-		dma-requests = <16>;
-		dma-masters = <2>;
-		#dma-cells = <3>;
-		chan_allocation_order = <1>;
-		chan_priority = <1>;
-		block_size = <0xfff>;
-		data-width = <8 8>;
-	};
-
-DMA clients connected to the Designware DMA controller must use the format
-described in the dma.txt file, using a four-cell specifier for each channel.
-The four cells in order are:
-
-1. A phandle pointing to the DMA controller
-2. The DMA request line number
-3. Memory master for transfers on allocated channel
-4. Peripheral master for transfers on allocated channel
-
-Example:
-	
-	serial@e0000000 {
-		compatible = "arm,pl011", "arm,primecell";
-		reg = <0xe0000000 0x1000>;
-		interrupts = <0 35 0x4>;
-		dmas = <&dmahost 12 0 1>,
-			<&dmahost 13 1 0>;
-		dma-names = "rx", "rx";
-	};
diff --git a/Documentation/driver-api/dmaengine/provider.rst b/Documentation/driver-api/dmaengine/provider.rst
index ab82c8f..ddb0a81 100644
--- a/Documentation/driver-api/dmaengine/provider.rst
+++ b/Documentation/driver-api/dmaengine/provider.rst
@@ -239,6 +239,22 @@
     want to transfer a portion of uncompressed data directly to the
     display to print it
 
+- DMA_COMPLETION_NO_ORDER
+
+  - The device does not support in order completion.
+
+  - The driver should return DMA_OUT_OF_ORDER for device_tx_status if
+    the device is setting this capability.
+
+  - All cookie tracking and checking API should be treated as invalid if
+    the device exports this capability.
+
+  - At this point, this is incompatible with polling option for dmatest.
+
+  - If this cap is set, the user is recommended to provide an unique
+    identifier for each descriptor sent to the DMA device in order to
+    properly track the completion.
+
 - DMA_REPEAT
 
   - The device supports repeated transfers. A repeated transfer, indicated by
@@ -420,6 +436,9 @@
   - In the case of a cyclic transfer, it should only take into
     account the current period.
 
+  - Should return DMA_OUT_OF_ORDER if the device does not support in order
+    completion and is completing the operation out of order.
+
   - This function can be called in an interrupt context.
 
 - device_config
@@ -509,7 +528,7 @@
 DMA_CTRL_ACK
 
 - If clear, the descriptor cannot be reused by provider until the
-  client acknowledges receipt, i.e. has has a chance to establish any
+  client acknowledges receipt, i.e. has a chance to establish any
   dependency chains
 
 - This can be acked by invoking async_tx_ack()
diff --git a/MAINTAINERS b/MAINTAINERS
index 9590b98..2000dae 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11296,6 +11296,19 @@
 T:	git git://git.monstr.eu/linux-2.6-microblaze.git
 F:	arch/microblaze/
 
+MICROCHIP AT91 DMA DRIVERS
+M:	Ludovic Desroches <ludovic.desroches@microchip.com>
+M:	Tudor Ambarus <tudor.ambarus@microchip.com>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+L:	dmaengine@vger.kernel.org
+S:	Supported
+F:	Documentation/devicetree/bindings/dma/atmel-dma.txt
+F:	drivers/dma/at_hdmac.c
+F:	drivers/dma/at_hdmac_regs.h
+F:	drivers/dma/at_xdmac.c
+F:	include/dt-bindings/dma/at91.h
+F:	include/linux/platform_data/dma-atmel.h
+
 MICROCHIP AT91 SERIAL DRIVER
 M:	Richard Genoud <richard.genoud@gmail.com>
 S:	Maintained
@@ -11324,17 +11337,6 @@
 S:	Supported
 F:	sound/soc/atmel
 
-MICROCHIP DMA DRIVER
-M:	Ludovic Desroches <ludovic.desroches@microchip.com>
-L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-L:	dmaengine@vger.kernel.org
-S:	Supported
-F:	Documentation/devicetree/bindings/dma/atmel-dma.txt
-F:	drivers/dma/at_hdmac.c
-F:	drivers/dma/at_hdmac_regs.h
-F:	include/dt-bindings/dma/at91.h
-F:	include/linux/platform_data/dma-atmel.h
-
 MICROCHIP ECC DRIVER
 M:	Tudor Ambarus <tudor.ambarus@microchip.com>
 L:	linux-crypto@vger.kernel.org
@@ -11470,13 +11472,6 @@
 S:	Supported
 F:	drivers/net/wireless/microchip/wilc1000/
 
-MICROCHIP XDMA DRIVER
-M:	Ludovic Desroches <ludovic.desroches@microchip.com>
-L:	linux-arm-kernel@lists.infradead.org
-L:	dmaengine@vger.kernel.org
-S:	Supported
-F:	drivers/dma/at_xdmac.c
-
 MICROSEMI MIPS SOCS
 M:	Alexandre Belloni <alexandre.belloni@bootlin.com>
 M:	Microchip Linux Driver Support <UNGLinuxDriver@microchip.com>
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 668e963..518a143 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -285,8 +285,9 @@
 config INTEL_IDXD
 	tristate "Intel Data Accelerators support"
 	depends on PCI && X86_64
+	depends on PCI_MSI
+	depends on SBITMAP
 	select DMA_ENGINE
-	select SBITMAP
 	help
 	  Enable support for the Intel(R) data accelerators present
 	  in Intel Xeon CPU.
diff --git a/drivers/dma/acpi-dma.c b/drivers/dma/acpi-dma.c
index 8a05db3..35f4804 100644
--- a/drivers/dma/acpi-dma.c
+++ b/drivers/dma/acpi-dma.c
@@ -358,19 +358,12 @@
 {
 	struct acpi_dma_parser_data pdata;
 	struct acpi_dma_spec *dma_spec = &pdata.dma_spec;
+	struct acpi_device *adev = ACPI_COMPANION(dev);
 	struct list_head resource_list;
-	struct acpi_device *adev;
 	struct acpi_dma *adma;
 	struct dma_chan *chan = NULL;
 	int found;
-
-	/* Check if the device was enumerated by ACPI */
-	if (!dev)
-		return ERR_PTR(-ENODEV);
-
-	adev = ACPI_COMPANION(dev);
-	if (!adev)
-		return ERR_PTR(-ENODEV);
+	int ret;
 
 	memset(&pdata, 0, sizeof(pdata));
 	pdata.index = index;
@@ -380,9 +373,11 @@
 	dma_spec->slave_id = -1;
 
 	INIT_LIST_HEAD(&resource_list);
-	acpi_dev_get_resources(adev, &resource_list,
-			acpi_dma_parse_fixed_dma, &pdata);
+	ret = acpi_dev_get_resources(adev, &resource_list,
+				     acpi_dma_parse_fixed_dma, &pdata);
 	acpi_dev_free_resource_list(&resource_list);
+	if (ret < 0)
+		return ERR_PTR(ret);
 
 	if (dma_spec->slave_id < 0 || dma_spec->chan_id < 0)
 		return ERR_PTR(-ENODEV);
diff --git a/drivers/dma/altera-msgdma.c b/drivers/dma/altera-msgdma.c
index 539e785..321ac3a 100644
--- a/drivers/dma/altera-msgdma.c
+++ b/drivers/dma/altera-msgdma.c
@@ -153,7 +153,8 @@
  * struct msgdma_sw_desc - implements a sw descriptor
  * @async_tx: support for the async_tx api
  * @hw_desc: assosiated HW descriptor
- * @free_list: node of the free SW descriprots list
+ * @node: node to move from the free list to the tx list
+ * @tx_list: transmit list node
  */
 struct msgdma_sw_desc {
 	struct dma_async_tx_descriptor async_tx;
@@ -162,7 +163,7 @@
 	struct list_head tx_list;
 };
 
-/**
+/*
  * struct msgdma_device - DMA device structure
  */
 struct msgdma_device {
@@ -258,6 +259,7 @@
  * @dst: Destination buffer address
  * @src: Source buffer address
  * @len: Transfer length
+ * @stride: Read/write stride value to set
  */
 static void msgdma_desc_config(struct msgdma_extended_desc *desc,
 			       dma_addr_t dst, dma_addr_t src, size_t len,
diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c
index 73a2078..45bbcd6 100644
--- a/drivers/dma/at_hdmac.c
+++ b/drivers/dma/at_hdmac.c
@@ -656,7 +656,7 @@
 
 /**
  * atc_tx_submit - set the prepared descriptor(s) to be executed by the engine
- * @desc: descriptor at the head of the transaction chain
+ * @tx: descriptor at the head of the transaction chain
  *
  * Queue chain if DMA engine is working already
  *
@@ -1196,7 +1196,7 @@
 	return NULL;
 }
 
-/**
+/*
  * atc_dma_cyclic_check_values
  * Check for too big/unaligned periods and unaligned DMA buffer
  */
@@ -1217,7 +1217,7 @@
 	return -EINVAL;
 }
 
-/**
+/*
  * atc_dma_cyclic_fill_desc - Fill one period descriptor
  */
 static int
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index 2b06a7a..a53e71d 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -592,13 +592,25 @@
 	caps->src_addr_widths = device->src_addr_widths;
 	caps->dst_addr_widths = device->dst_addr_widths;
 	caps->directions = device->directions;
+	caps->min_burst = device->min_burst;
 	caps->max_burst = device->max_burst;
+	caps->max_sg_burst = device->max_sg_burst;
 	caps->residue_granularity = device->residue_granularity;
 	caps->descriptor_reuse = device->descriptor_reuse;
 	caps->cmd_pause = !!device->device_pause;
 	caps->cmd_resume = !!device->device_resume;
 	caps->cmd_terminate = !!device->device_terminate_all;
 
+	/*
+	 * DMA engine device might be configured with non-uniformly
+	 * distributed slave capabilities per device channels. In this
+	 * case the corresponding driver may provide the device_caps
+	 * callback to override the generic capabilities with
+	 * channel-specific ones.
+	 */
+	if (device->device_caps)
+		device->device_caps(chan, caps);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(dma_get_slave_caps);
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index 604f803..45d4d92 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -829,7 +829,10 @@
 			result("test timed out", total_tests, src->off, dst->off,
 			       len, 0);
 			goto error_unmap_continue;
-		} else if (status != DMA_COMPLETE) {
+		} else if (status != DMA_COMPLETE &&
+			   !(dma_has_cap(DMA_COMPLETION_NO_ORDER,
+					 dev->cap_mask) &&
+			     status == DMA_OUT_OF_ORDER)) {
 			result(status == DMA_ERROR ?
 			       "completion error status" :
 			       "completion busy status", total_tests, src->off,
@@ -1007,6 +1010,12 @@
 	dtc->chan = chan;
 	INIT_LIST_HEAD(&dtc->threads);
 
+	if (dma_has_cap(DMA_COMPLETION_NO_ORDER, dma_dev->cap_mask) &&
+	    info->params.polled) {
+		info->params.polled = false;
+		pr_warn("DMA_COMPLETION_NO_ORDER, polled disabled\n");
+	}
+
 	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
 		if (dmatest == 0) {
 			cnt = dmatest_add_threads(info, dtc, DMA_MEMCPY);
diff --git a/drivers/dma/dw/Makefile b/drivers/dma/dw/Makefile
index b6f0669..a6f358a 100644
--- a/drivers/dma/dw/Makefile
+++ b/drivers/dma/dw/Makefile
@@ -1,11 +1,11 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_DW_DMAC_CORE)	+= dw_dmac_core.o
-dw_dmac_core-objs	:= core.o dw.o idma32.o
+dw_dmac_core-y			:= core.o dw.o idma32.o
+dw_dmac_core-$(CONFIG_ACPI)	+= acpi.o
 
 obj-$(CONFIG_DW_DMAC)		+= dw_dmac.o
 dw_dmac-y			:= platform.o
-dw_dmac-$(CONFIG_ACPI)		+= acpi.o
 dw_dmac-$(CONFIG_OF)		+= of.o
 
 obj-$(CONFIG_DW_DMAC_PCI)	+= dw_dmac_pci.o
-dw_dmac_pci-objs	:= pci.o
+dw_dmac_pci-y			:= pci.o
diff --git a/drivers/dma/dw/acpi.c b/drivers/dma/dw/acpi.c
index f6e8d55..c510c10 100644
--- a/drivers/dma/dw/acpi.c
+++ b/drivers/dma/dw/acpi.c
@@ -41,6 +41,7 @@
 	if (ret)
 		dev_err(dev, "could not register acpi_dma_controller\n");
 }
+EXPORT_SYMBOL_GPL(dw_dma_acpi_controller_register);
 
 void dw_dma_acpi_controller_free(struct dw_dma *dw)
 {
@@ -51,3 +52,4 @@
 
 	acpi_dma_controller_free(dev);
 }
+EXPORT_SYMBOL_GPL(dw_dma_acpi_controller_free);
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index a1b56f5..4700f2e 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -786,6 +786,11 @@
 
 	memcpy(&dwc->dma_sconfig, sconfig, sizeof(*sconfig));
 
+	dwc->dma_sconfig.src_maxburst =
+		clamp(dwc->dma_sconfig.src_maxburst, 0U, dwc->max_burst);
+	dwc->dma_sconfig.dst_maxburst =
+		clamp(dwc->dma_sconfig.dst_maxburst, 0U, dwc->max_burst);
+
 	dw->encode_maxburst(dwc, &dwc->dma_sconfig.src_maxburst);
 	dw->encode_maxburst(dwc, &dwc->dma_sconfig.dst_maxburst);
 
@@ -1037,6 +1042,25 @@
 	dev_vdbg(chan2dev(chan), "%s: done\n", __func__);
 }
 
+static void dwc_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
+{
+	struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+
+	caps->max_burst = dwc->max_burst;
+
+	/*
+	 * It might be crucial for some devices to have the hardware
+	 * accelerated multi-block transfers supported, aka LLPs in DW DMAC
+	 * notation. So if LLPs are supported then max_sg_burst is set to
+	 * zero which means unlimited number of SG entries can be handled in a
+	 * single DMA transaction, otherwise it's just one SG entry.
+	 */
+	if (dwc->nollp)
+		caps->max_sg_burst = 1;
+	else
+		caps->max_sg_burst = 0;
+}
+
 int do_dma_probe(struct dw_dma_chip *chip)
 {
 	struct dw_dma *dw = chip->dw;
@@ -1166,11 +1190,23 @@
 			 */
 			dwc->block_size =
 				(4 << ((pdata->block_size >> 4 * i) & 0xf)) - 1;
+
+			/*
+			 * According to the DW DMA databook the true scatter-
+			 * gether LLPs aren't available if either multi-block
+			 * config is disabled (CHx_MULTI_BLK_EN == 0) or the
+			 * LLP register is hard-coded to zeros
+			 * (CHx_HC_LLP == 1).
+			 */
 			dwc->nollp =
-				(dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0;
+				(dwc_params >> DWC_PARAMS_MBLK_EN & 0x1) == 0 ||
+				(dwc_params >> DWC_PARAMS_HC_LLP & 0x1) == 1;
+			dwc->max_burst =
+				(0x4 << (dwc_params >> DWC_PARAMS_MSIZE & 0x7));
 		} else {
 			dwc->block_size = pdata->block_size;
 			dwc->nollp = !pdata->multi_block[i];
+			dwc->max_burst = pdata->max_burst[i] ?: DW_DMA_MAX_BURST;
 		}
 	}
 
@@ -1193,6 +1229,7 @@
 	dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy;
 	dw->dma.device_prep_slave_sg = dwc_prep_slave_sg;
 
+	dw->dma.device_caps = dwc_caps;
 	dw->dma.device_config = dwc_config;
 	dw->dma.device_pause = dwc_pause;
 	dw->dma.device_resume = dwc_resume;
@@ -1202,12 +1239,21 @@
 	dw->dma.device_issue_pending = dwc_issue_pending;
 
 	/* DMA capabilities */
+	dw->dma.min_burst = DW_DMA_MIN_BURST;
+	dw->dma.max_burst = DW_DMA_MAX_BURST;
 	dw->dma.src_addr_widths = DW_DMA_BUSWIDTHS;
 	dw->dma.dst_addr_widths = DW_DMA_BUSWIDTHS;
 	dw->dma.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV) |
 			     BIT(DMA_MEM_TO_MEM);
 	dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 
+	/*
+	 * For now there is no hardware with non uniform maximum block size
+	 * across all of the device channels, so we set the maximum segment
+	 * size as the block size found for the very first channel.
+	 */
+	dma_set_max_seg_size(dw->dma.dev, dw->chan[0].block_size);
+
 	err = dma_async_device_register(&dw->dma);
 	if (err)
 		goto err_dma_register;
diff --git a/drivers/dma/dw/of.c b/drivers/dma/dw/of.c
index 9e27831..1474b38 100644
--- a/drivers/dma/dw/of.c
+++ b/drivers/dma/dw/of.c
@@ -98,6 +98,11 @@
 			pdata->multi_block[tmp] = 1;
 	}
 
+	if (of_property_read_u32_array(np, "snps,max-burst-len", pdata->max_burst,
+				       nr_channels)) {
+		memset32(pdata->max_burst, DW_DMA_MAX_BURST, nr_channels);
+	}
+
 	if (!of_property_read_u32(np, "snps,dma-protection-control", &tmp)) {
 		if (tmp > CHAN_PROTCTL_MASK)
 			return NULL;
diff --git a/drivers/dma/dw/pci.c b/drivers/dma/dw/pci.c
index cf6e8ec..1142aa6 100644
--- a/drivers/dma/dw/pci.c
+++ b/drivers/dma/dw/pci.c
@@ -60,6 +60,8 @@
 	if (ret)
 		return ret;
 
+	dw_dma_acpi_controller_register(chip->dw);
+
 	pci_set_drvdata(pdev, data);
 
 	return 0;
@@ -71,6 +73,8 @@
 	struct dw_dma_chip *chip = data->chip;
 	int ret;
 
+	dw_dma_acpi_controller_free(chip->dw);
+
 	ret = data->remove(chip);
 	if (ret)
 		dev_warn(&pdev->dev, "can't remove device properly: %d\n", ret);
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 3fce66e..76654bd 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -125,6 +125,8 @@
 
 /* Bitfields in DWC_PARAMS */
 #define DWC_PARAMS_MBLK_EN	11		/* multi block transfer */
+#define DWC_PARAMS_HC_LLP	13		/* set LLP register to zero */
+#define DWC_PARAMS_MSIZE	16		/* max group transaction size */
 
 /* bursts size */
 enum dw_dma_msize {
@@ -283,6 +285,7 @@
 	/* hardware configuration */
 	unsigned int		block_size;
 	bool			nollp;
+	u32			max_burst;
 
 	/* custom slave configuration */
 	struct dw_dma_slave	dws;
diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c
index 9c8b4d3..87a2460 100644
--- a/drivers/dma/ep93xx_dma.c
+++ b/drivers/dma/ep93xx_dma.c
@@ -147,6 +147,7 @@
  *                is set via .device_config before slave operation is
  *                prepared
  * @runtime_ctrl: M2M runtime values for the control register.
+ * @slave_config: slave configuration
  *
  * As EP93xx DMA controller doesn't support real chained DMA descriptors we
  * will have slightly different scheme here: @active points to a head of
@@ -187,6 +188,7 @@
  * @dma_dev: holds the dmaengine device
  * @m2m: is this an M2M or M2P device
  * @hw_setup: method which sets the channel up for operation
+ * @hw_synchronize: synchronizes DMA channel termination to current context
  * @hw_shutdown: shuts the channel down and flushes whatever is left
  * @hw_submit: pushes active descriptor(s) to the hardware
  * @hw_interrupt: handle the interrupt
diff --git a/drivers/dma/fsl-qdma.c b/drivers/dma/fsl-qdma.c
index 95cc025..ed2ab46 100644
--- a/drivers/dma/fsl-qdma.c
+++ b/drivers/dma/fsl-qdma.c
@@ -56,7 +56,7 @@
 
 /* Registers for bit and genmask */
 #define FSL_QDMA_CQIDR_SQT		BIT(15)
-#define QDMA_CCDF_FOTMAT		BIT(29)
+#define QDMA_CCDF_FORMAT		BIT(29)
 #define QDMA_CCDF_SER			BIT(30)
 #define QDMA_SG_FIN			BIT(30)
 #define QDMA_SG_LEN_MASK		GENMASK(29, 0)
@@ -110,8 +110,19 @@
 #define FSL_QDMA_CMD_DSEN_OFFSET	19
 #define FSL_QDMA_CMD_LWC_OFFSET		16
 
+/* Field definition for Descriptor status */
+#define QDMA_CCDF_STATUS_RTE		BIT(5)
+#define QDMA_CCDF_STATUS_WTE		BIT(4)
+#define QDMA_CCDF_STATUS_CDE		BIT(2)
+#define QDMA_CCDF_STATUS_SDE		BIT(1)
+#define QDMA_CCDF_STATUS_DDE		BIT(0)
+#define QDMA_CCDF_STATUS_MASK		(QDMA_CCDF_STATUS_RTE | \
+					QDMA_CCDF_STATUS_WTE | \
+					QDMA_CCDF_STATUS_CDE | \
+					QDMA_CCDF_STATUS_SDE | \
+					QDMA_CCDF_STATUS_DDE)
+
 /* Field definition for Descriptor offset */
-#define QDMA_CCDF_STATUS		20
 #define QDMA_CCDF_OFFSET		20
 #define QDMA_SDDF_CMD(x)		(((u64)(x)) << 32)
 
@@ -136,7 +147,7 @@
  * @__reserved1:	    Reserved field.
  * @cfg8b_w1:		    Compound descriptor command queue origin produced
  *			    by qDMA and dynamic debug field.
- * @data		    Pointer to the memory 40-bit address, describes DMA
+ * @data:		    Pointer to the memory 40-bit address, describes DMA
  *			    source information and DMA destination information.
  */
 struct fsl_qdma_format {
@@ -243,13 +254,14 @@
 static inline void
 qdma_ccdf_set_format(struct fsl_qdma_format *ccdf, int offset)
 {
-	ccdf->cfg = cpu_to_le32(QDMA_CCDF_FOTMAT | offset);
+	ccdf->cfg = cpu_to_le32(QDMA_CCDF_FORMAT |
+				(offset << QDMA_CCDF_OFFSET));
 }
 
 static inline int
 qdma_ccdf_get_status(const struct fsl_qdma_format *ccdf)
 {
-	return (le32_to_cpu(ccdf->status) & QDMA_CCDF_MASK) >> QDMA_CCDF_STATUS;
+	return (le32_to_cpu(ccdf->status) & QDMA_CCDF_STATUS_MASK);
 }
 
 static inline void
@@ -618,6 +630,7 @@
 {
 	bool duplicate;
 	u32 reg, i, count;
+	u8 completion_status;
 	struct fsl_qdma_queue *temp_queue;
 	struct fsl_qdma_format *status_addr;
 	struct fsl_qdma_comp *fsl_comp = NULL;
@@ -677,6 +690,8 @@
 		}
 		list_del(&fsl_comp->list);
 
+		completion_status = qdma_ccdf_get_status(status_addr);
+
 		reg = qdma_readl(fsl_qdma, block + FSL_QDMA_BSQMR);
 		reg |= FSL_QDMA_BSQMR_DI;
 		qdma_desc_addr_set64(status_addr, 0x0);
@@ -686,6 +701,31 @@
 		qdma_writel(fsl_qdma, reg, block + FSL_QDMA_BSQMR);
 		spin_unlock(&temp_queue->queue_lock);
 
+		/* The completion_status is evaluated here
+		 * (outside of spin lock)
+		 */
+		if (completion_status) {
+			/* A completion error occurred! */
+			if (completion_status & QDMA_CCDF_STATUS_WTE) {
+				/* Write transaction error */
+				fsl_comp->vdesc.tx_result.result =
+					DMA_TRANS_WRITE_FAILED;
+			} else if (completion_status & QDMA_CCDF_STATUS_RTE) {
+				/* Read transaction error */
+				fsl_comp->vdesc.tx_result.result =
+					DMA_TRANS_READ_FAILED;
+			} else {
+				/* Command/source/destination
+				 * description error
+				 */
+				fsl_comp->vdesc.tx_result.result =
+					DMA_TRANS_ABORTED;
+				dev_err(fsl_qdma->dma_dev.dev,
+					"DMA status descriptor error %x\n",
+					completion_status);
+			}
+		}
+
 		spin_lock(&fsl_comp->qchan->vchan.lock);
 		vchan_cookie_complete(&fsl_comp->vdesc);
 		fsl_comp->qchan->status = DMA_COMPLETE;
@@ -700,11 +740,22 @@
 	unsigned int intr;
 	struct fsl_qdma_engine *fsl_qdma = dev_id;
 	void __iomem *status = fsl_qdma->status_base;
+	unsigned int decfdw0r;
+	unsigned int decfdw1r;
+	unsigned int decfdw2r;
+	unsigned int decfdw3r;
 
 	intr = qdma_readl(fsl_qdma, status + FSL_QDMA_DEDR);
 
-	if (intr)
-		dev_err(fsl_qdma->dma_dev.dev, "DMA transaction error!\n");
+	if (intr) {
+		decfdw0r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW0R);
+		decfdw1r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW1R);
+		decfdw2r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW2R);
+		decfdw3r = qdma_readl(fsl_qdma, status + FSL_QDMA_DECFDW3R);
+		dev_err(fsl_qdma->dma_dev.dev,
+			"DMA transaction error! (%x: %x-%x-%x-%x)\n",
+			intr, decfdw0r, decfdw1r, decfdw2r, decfdw3r);
+	}
 
 	qdma_writel(fsl_qdma, FSL_QDMA_DEDR_CLEAR, status + FSL_QDMA_DEDR);
 	return IRQ_HANDLED;
diff --git a/drivers/dma/hisi_dma.c b/drivers/dma/hisi_dma.c
index ed36192..e1a958a 100644
--- a/drivers/dma/hisi_dma.c
+++ b/drivers/dma/hisi_dma.c
@@ -511,7 +511,6 @@
 	struct device *dev = &pdev->dev;
 	struct hisi_dma_dev *hdma_dev;
 	struct dma_device *dma_dev;
-	size_t dev_size;
 	int ret;
 
 	ret = pcim_enable_device(pdev);
@@ -534,9 +533,7 @@
 	if (ret)
 		return ret;
 
-	dev_size = sizeof(struct hisi_dma_chan) * HISI_DMA_CHAN_NUM +
-		   sizeof(*hdma_dev);
-	hdma_dev = devm_kzalloc(dev, dev_size, GFP_KERNEL);
+	hdma_dev = devm_kzalloc(dev, struct_size(hdma_dev, chan, HISI_DMA_CHAN_NUM), GFP_KERNEL);
 	if (!hdma_dev)
 		return -EINVAL;
 
diff --git a/drivers/dma/idxd/cdev.c b/drivers/dma/idxd/cdev.c
index cb376cf..c397615 100644
--- a/drivers/dma/idxd/cdev.c
+++ b/drivers/dma/idxd/cdev.c
@@ -115,6 +115,9 @@
 	dev_dbg(dev, "%s called\n", __func__);
 	filep->private_data = NULL;
 
+	/* Wait for in-flight operations to complete. */
+	idxd_wq_drain(wq);
+
 	kfree(ctx);
 	mutex_lock(&wq->wq_lock);
 	idxd_wq_put(wq);
diff --git a/drivers/dma/idxd/device.c b/drivers/dma/idxd/device.c
index 8d2718c5..14b4585 100644
--- a/drivers/dma/idxd/device.c
+++ b/drivers/dma/idxd/device.c
@@ -6,70 +6,39 @@
 #include <linux/pci.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/dmaengine.h>
+#include <linux/irq.h>
+#include <linux/msi.h>
 #include <uapi/linux/idxd.h>
 #include "../dmaengine.h"
 #include "idxd.h"
 #include "registers.h"
 
-static int idxd_cmd_wait(struct idxd_device *idxd, u32 *status, int timeout);
-static int idxd_cmd_send(struct idxd_device *idxd, int cmd_code, u32 operand);
+static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
+			  u32 *status);
 
 /* Interrupt control bits */
-int idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
+void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
 {
-	struct pci_dev *pdev = idxd->pdev;
-	int msixcnt = pci_msix_vec_count(pdev);
-	union msix_perm perm;
-	u32 offset;
+	struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector);
 
-	if (vec_id < 0 || vec_id >= msixcnt)
-		return -EINVAL;
-
-	offset = idxd->msix_perm_offset + vec_id * 8;
-	perm.bits = ioread32(idxd->reg_base + offset);
-	perm.ignore = 1;
-	iowrite32(perm.bits, idxd->reg_base + offset);
-
-	return 0;
+	pci_msi_mask_irq(data);
 }
 
 void idxd_mask_msix_vectors(struct idxd_device *idxd)
 {
 	struct pci_dev *pdev = idxd->pdev;
 	int msixcnt = pci_msix_vec_count(pdev);
-	int i, rc;
+	int i;
 
-	for (i = 0; i < msixcnt; i++) {
-		rc = idxd_mask_msix_vector(idxd, i);
-		if (rc < 0)
-			dev_warn(&pdev->dev,
-				 "Failed disabling msix vec %d\n", i);
-	}
+	for (i = 0; i < msixcnt; i++)
+		idxd_mask_msix_vector(idxd, i);
 }
 
-int idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id)
+void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id)
 {
-	struct pci_dev *pdev = idxd->pdev;
-	int msixcnt = pci_msix_vec_count(pdev);
-	union msix_perm perm;
-	u32 offset;
+	struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector);
 
-	if (vec_id < 0 || vec_id >= msixcnt)
-		return -EINVAL;
-
-	offset = idxd->msix_perm_offset + vec_id * 8;
-	perm.bits = ioread32(idxd->reg_base + offset);
-	perm.ignore = 0;
-	iowrite32(perm.bits, idxd->reg_base + offset);
-
-	/*
-	 * A readback from the device ensures that any previously generated
-	 * completion record writes are visible to software based on PCI
-	 * ordering rules.
-	 */
-	perm.bits = ioread32(idxd->reg_base + offset);
-
-	return 0;
+	pci_msi_unmask_irq(data);
 }
 
 void idxd_unmask_error_interrupts(struct idxd_device *idxd)
@@ -160,16 +129,14 @@
 int idxd_wq_alloc_resources(struct idxd_wq *wq)
 {
 	struct idxd_device *idxd = wq->idxd;
-	struct idxd_group *group = wq->group;
 	struct device *dev = &idxd->pdev->dev;
 	int rc, num_descs, i;
 
 	if (wq->type != IDXD_WQT_KERNEL)
 		return 0;
 
-	num_descs = wq->size +
-		idxd->hw.gen_cap.max_descs_per_engine * group->num_engines;
-	wq->num_descs = num_descs;
+	wq->num_descs = wq->size;
+	num_descs = wq->size;
 
 	rc = alloc_hw_descs(wq, num_descs);
 	if (rc < 0)
@@ -187,8 +154,8 @@
 	if (rc < 0)
 		goto fail_alloc_descs;
 
-	rc = sbitmap_init_node(&wq->sbmap, num_descs, -1, GFP_KERNEL,
-			       dev_to_node(dev));
+	rc = sbitmap_queue_init_node(&wq->sbq, num_descs, -1, false, GFP_KERNEL,
+				     dev_to_node(dev));
 	if (rc < 0)
 		goto fail_sbitmap_init;
 
@@ -201,7 +168,7 @@
 			sizeof(struct dsa_completion_record) * i;
 		desc->id = i;
 		desc->wq = wq;
-
+		desc->cpu = -1;
 		dma_async_tx_descriptor_init(&desc->txd, &wq->dma_chan);
 		desc->txd.tx_submit = idxd_dma_tx_submit;
 	}
@@ -227,7 +194,7 @@
 	free_hw_descs(wq);
 	free_descs(wq);
 	dma_free_coherent(dev, wq->compls_size, wq->compls, wq->compls_addr);
-	sbitmap_free(&wq->sbmap);
+	sbitmap_queue_free(&wq->sbq);
 }
 
 int idxd_wq_enable(struct idxd_wq *wq)
@@ -235,21 +202,13 @@
 	struct idxd_device *idxd = wq->idxd;
 	struct device *dev = &idxd->pdev->dev;
 	u32 status;
-	int rc;
-
-	lockdep_assert_held(&idxd->dev_lock);
 
 	if (wq->state == IDXD_WQ_ENABLED) {
 		dev_dbg(dev, "WQ %d already enabled\n", wq->id);
 		return -ENXIO;
 	}
 
-	rc = idxd_cmd_send(idxd, IDXD_CMD_ENABLE_WQ, wq->id);
-	if (rc < 0)
-		return rc;
-	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
-	if (rc < 0)
-		return rc;
+	idxd_cmd_exec(idxd, IDXD_CMD_ENABLE_WQ, wq->id, &status);
 
 	if (status != IDXD_CMDSTS_SUCCESS &&
 	    status != IDXD_CMDSTS_ERR_WQ_ENABLED) {
@@ -267,9 +226,7 @@
 	struct idxd_device *idxd = wq->idxd;
 	struct device *dev = &idxd->pdev->dev;
 	u32 status, operand;
-	int rc;
 
-	lockdep_assert_held(&idxd->dev_lock);
 	dev_dbg(dev, "Disabling WQ %d\n", wq->id);
 
 	if (wq->state != IDXD_WQ_ENABLED) {
@@ -278,12 +235,7 @@
 	}
 
 	operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
-	rc = idxd_cmd_send(idxd, IDXD_CMD_DISABLE_WQ, operand);
-	if (rc < 0)
-		return rc;
-	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
-	if (rc < 0)
-		return rc;
+	idxd_cmd_exec(idxd, IDXD_CMD_DISABLE_WQ, operand, &status);
 
 	if (status != IDXD_CMDSTS_SUCCESS) {
 		dev_dbg(dev, "WQ disable failed: %#x\n", status);
@@ -295,6 +247,22 @@
 	return 0;
 }
 
+void idxd_wq_drain(struct idxd_wq *wq)
+{
+	struct idxd_device *idxd = wq->idxd;
+	struct device *dev = &idxd->pdev->dev;
+	u32 operand;
+
+	if (wq->state != IDXD_WQ_ENABLED) {
+		dev_dbg(dev, "WQ %d in wrong state: %d\n", wq->id, wq->state);
+		return;
+	}
+
+	dev_dbg(dev, "Draining WQ %d\n", wq->id);
+	operand = BIT(wq->id % 16) | ((wq->id / 16) << 16);
+	idxd_cmd_exec(idxd, IDXD_CMD_DRAIN_WQ, operand, NULL);
+}
+
 int idxd_wq_map_portal(struct idxd_wq *wq)
 {
 	struct idxd_device *idxd = wq->idxd;
@@ -357,66 +325,79 @@
 	return false;
 }
 
-static int idxd_cmd_wait(struct idxd_device *idxd, u32 *status, int timeout)
+/*
+ * This is function is only used for reset during probe and will
+ * poll for completion. Once the device is setup with interrupts,
+ * all commands will be done via interrupt completion.
+ */
+void idxd_device_init_reset(struct idxd_device *idxd)
 {
-	u32 sts, to = timeout;
+	struct device *dev = &idxd->pdev->dev;
+	union idxd_command_reg cmd;
+	unsigned long flags;
 
-	lockdep_assert_held(&idxd->dev_lock);
-	sts = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
-	while (sts & IDXD_CMDSTS_ACTIVE && --to) {
+	memset(&cmd, 0, sizeof(cmd));
+	cmd.cmd = IDXD_CMD_RESET_DEVICE;
+	dev_dbg(dev, "%s: sending reset for init.\n", __func__);
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
+
+	while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) &
+	       IDXD_CMDSTS_ACTIVE)
 		cpu_relax();
-		sts = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
-	}
-
-	if (to == 0 && sts & IDXD_CMDSTS_ACTIVE) {
-		dev_warn(&idxd->pdev->dev, "%s timed out!\n", __func__);
-		*status = 0;
-		return -EBUSY;
-	}
-
-	*status = sts;
-	return 0;
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
 }
 
-static int idxd_cmd_send(struct idxd_device *idxd, int cmd_code, u32 operand)
+static void idxd_cmd_exec(struct idxd_device *idxd, int cmd_code, u32 operand,
+			  u32 *status)
 {
 	union idxd_command_reg cmd;
-	int rc;
-	u32 status;
-
-	lockdep_assert_held(&idxd->dev_lock);
-	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
-	if (rc < 0)
-		return rc;
+	DECLARE_COMPLETION_ONSTACK(done);
+	unsigned long flags;
 
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.cmd = cmd_code;
 	cmd.operand = operand;
+	cmd.int_req = 1;
+
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	wait_event_lock_irq(idxd->cmd_waitq,
+			    !test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags),
+			    idxd->dev_lock);
+
 	dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n",
 		__func__, cmd_code, operand);
+
+	__set_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags);
+	idxd->cmd_done = &done;
 	iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
 
-	return 0;
+	/*
+	 * After command submitted, release lock and go to sleep until
+	 * the command completes via interrupt.
+	 */
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
+	wait_for_completion(&done);
+	spin_lock_irqsave(&idxd->dev_lock, flags);
+	if (status)
+		*status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+	__clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags);
+	/* Wake up other pending commands */
+	wake_up(&idxd->cmd_waitq);
+	spin_unlock_irqrestore(&idxd->dev_lock, flags);
 }
 
 int idxd_device_enable(struct idxd_device *idxd)
 {
 	struct device *dev = &idxd->pdev->dev;
-	int rc;
 	u32 status;
 
-	lockdep_assert_held(&idxd->dev_lock);
 	if (idxd_is_enabled(idxd)) {
 		dev_dbg(dev, "Device already enabled\n");
 		return -ENXIO;
 	}
 
-	rc = idxd_cmd_send(idxd, IDXD_CMD_ENABLE_DEVICE, 0);
-	if (rc < 0)
-		return rc;
-	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
-	if (rc < 0)
-		return rc;
+	idxd_cmd_exec(idxd, IDXD_CMD_ENABLE_DEVICE, 0, &status);
 
 	/* If the command is successful or if the device was enabled */
 	if (status != IDXD_CMDSTS_SUCCESS &&
@@ -432,58 +413,29 @@
 int idxd_device_disable(struct idxd_device *idxd)
 {
 	struct device *dev = &idxd->pdev->dev;
-	int rc;
 	u32 status;
 
-	lockdep_assert_held(&idxd->dev_lock);
 	if (!idxd_is_enabled(idxd)) {
 		dev_dbg(dev, "Device is not enabled\n");
 		return 0;
 	}
 
-	rc = idxd_cmd_send(idxd, IDXD_CMD_DISABLE_DEVICE, 0);
-	if (rc < 0)
-		return rc;
-	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
-	if (rc < 0)
-		return rc;
+	idxd_cmd_exec(idxd, IDXD_CMD_DISABLE_DEVICE, 0, &status);
 
 	/* If the command is successful or if the device was disabled */
 	if (status != IDXD_CMDSTS_SUCCESS &&
 	    !(status & IDXD_CMDSTS_ERR_DIS_DEV_EN)) {
 		dev_dbg(dev, "%s: err_code: %#x\n", __func__, status);
-		rc = -ENXIO;
-		return rc;
+		return -ENXIO;
 	}
 
 	idxd->state = IDXD_DEV_CONF_READY;
 	return 0;
 }
 
-int __idxd_device_reset(struct idxd_device *idxd)
+void idxd_device_reset(struct idxd_device *idxd)
 {
-	u32 status;
-	int rc;
-
-	rc = idxd_cmd_send(idxd, IDXD_CMD_RESET_DEVICE, 0);
-	if (rc < 0)
-		return rc;
-	rc = idxd_cmd_wait(idxd, &status, IDXD_REG_TIMEOUT);
-	if (rc < 0)
-		return rc;
-
-	return 0;
-}
-
-int idxd_device_reset(struct idxd_device *idxd)
-{
-	unsigned long flags;
-	int rc;
-
-	spin_lock_irqsave(&idxd->dev_lock, flags);
-	rc = __idxd_device_reset(idxd);
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
-	return rc;
+	idxd_cmd_exec(idxd, IDXD_CMD_RESET_DEVICE, 0, NULL);
 }
 
 /* Device configuration bits */
diff --git a/drivers/dma/idxd/dma.c b/drivers/dma/idxd/dma.c
index c64c142..0c892cb 100644
--- a/drivers/dma/idxd/dma.c
+++ b/drivers/dma/idxd/dma.c
@@ -133,7 +133,7 @@
 					  dma_cookie_t cookie,
 					  struct dma_tx_state *txstate)
 {
-	return dma_cookie_status(dma_chan, cookie, txstate);
+	return DMA_OUT_OF_ORDER;
 }
 
 /*
@@ -174,6 +174,7 @@
 	INIT_LIST_HEAD(&dma->channels);
 	dma->dev = &idxd->pdev->dev;
 
+	dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask);
 	dma->device_release = idxd_dma_release;
 
 	if (idxd->hw.opcap.bits[0] & IDXD_OPCAP_MEMMOVE) {
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h
index 908c8d0..e62b479 100644
--- a/drivers/dma/idxd/idxd.h
+++ b/drivers/dma/idxd/idxd.h
@@ -104,7 +104,6 @@
 	enum idxd_wq_state state;
 	unsigned long flags;
 	union wqcfg wqcfg;
-	atomic_t dq_count;	/* dedicated queue flow control */
 	u32 vec_ptr;		/* interrupt steering */
 	struct dsa_hw_desc **hw_descs;
 	int num_descs;
@@ -112,10 +111,8 @@
 	dma_addr_t compls_addr;
 	int compls_size;
 	struct idxd_desc **descs;
-	struct sbitmap sbmap;
+	struct sbitmap_queue sbq;
 	struct dma_chan dma_chan;
-	struct percpu_rw_semaphore submit_lock;
-	wait_queue_head_t submit_waitq;
 	char name[WQ_NAME_SIZE + 1];
 };
 
@@ -145,6 +142,7 @@
 
 enum idxd_device_flag {
 	IDXD_FLAG_CONFIGURABLE = 0,
+	IDXD_FLAG_CMD_RUNNING,
 };
 
 struct idxd_device {
@@ -161,6 +159,7 @@
 	void __iomem *reg_base;
 
 	spinlock_t dev_lock;	/* spinlock for device */
+	struct completion *cmd_done;
 	struct idxd_group *groups;
 	struct idxd_wq *wqs;
 	struct idxd_engine *engines;
@@ -183,12 +182,14 @@
 	int nr_tokens;		/* non-reserved tokens */
 
 	union sw_err_reg sw_err;
-
+	wait_queue_head_t cmd_waitq;
 	struct msix_entry *msix_entries;
 	int num_wq_irqs;
 	struct idxd_irq_entry *irq_entries;
 
 	struct dma_device dma_dev;
+	struct workqueue_struct *wq;
+	struct work_struct work;
 };
 
 /* IDXD software descriptor */
@@ -201,6 +202,7 @@
 	struct llist_node llnode;
 	struct list_head list;
 	int id;
+	int cpu;
 	struct idxd_wq *wq;
 };
 
@@ -271,14 +273,14 @@
 void idxd_mask_error_interrupts(struct idxd_device *idxd);
 void idxd_unmask_error_interrupts(struct idxd_device *idxd);
 void idxd_mask_msix_vectors(struct idxd_device *idxd);
-int idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id);
-int idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id);
+void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id);
+void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id);
 
 /* device control */
+void idxd_device_init_reset(struct idxd_device *idxd);
 int idxd_device_enable(struct idxd_device *idxd);
 int idxd_device_disable(struct idxd_device *idxd);
-int idxd_device_reset(struct idxd_device *idxd);
-int __idxd_device_reset(struct idxd_device *idxd);
+void idxd_device_reset(struct idxd_device *idxd);
 void idxd_device_cleanup(struct idxd_device *idxd);
 int idxd_device_config(struct idxd_device *idxd);
 void idxd_device_wqs_clear_state(struct idxd_device *idxd);
@@ -288,6 +290,7 @@
 void idxd_wq_free_resources(struct idxd_wq *wq);
 int idxd_wq_enable(struct idxd_wq *wq);
 int idxd_wq_disable(struct idxd_wq *wq);
+void idxd_wq_drain(struct idxd_wq *wq);
 int idxd_wq_map_portal(struct idxd_wq *wq);
 void idxd_wq_unmap_portal(struct idxd_wq *wq);
 void idxd_wq_disable_cleanup(struct idxd_wq *wq);
diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c
index 7778c05..c7c6197 100644
--- a/drivers/dma/idxd/init.c
+++ b/drivers/dma/idxd/init.c
@@ -141,22 +141,12 @@
 	return rc;
 }
 
-static void idxd_wqs_free_lock(struct idxd_device *idxd)
-{
-	int i;
-
-	for (i = 0; i < idxd->max_wqs; i++) {
-		struct idxd_wq *wq = &idxd->wqs[i];
-
-		percpu_free_rwsem(&wq->submit_lock);
-	}
-}
-
 static int idxd_setup_internals(struct idxd_device *idxd)
 {
 	struct device *dev = &idxd->pdev->dev;
 	int i;
 
+	init_waitqueue_head(&idxd->cmd_waitq);
 	idxd->groups = devm_kcalloc(dev, idxd->max_groups,
 				    sizeof(struct idxd_group), GFP_KERNEL);
 	if (!idxd->groups)
@@ -181,19 +171,11 @@
 
 	for (i = 0; i < idxd->max_wqs; i++) {
 		struct idxd_wq *wq = &idxd->wqs[i];
-		int rc;
 
 		wq->id = i;
 		wq->idxd = idxd;
 		mutex_init(&wq->wq_lock);
-		atomic_set(&wq->dq_count, 0);
-		init_waitqueue_head(&wq->submit_waitq);
 		wq->idxd_cdev.minor = -1;
-		rc = percpu_init_rwsem(&wq->submit_lock);
-		if (rc < 0) {
-			idxd_wqs_free_lock(idxd);
-			return rc;
-		}
 	}
 
 	for (i = 0; i < idxd->max_engines; i++) {
@@ -201,6 +183,10 @@
 		idxd->engines[i].id = i;
 	}
 
+	idxd->wq = create_workqueue(dev_name(dev));
+	if (!idxd->wq)
+		return -ENOMEM;
+
 	return 0;
 }
 
@@ -296,9 +282,7 @@
 	int rc;
 
 	dev_dbg(dev, "%s entered and resetting device\n", __func__);
-	rc = idxd_device_reset(idxd);
-	if (rc < 0)
-		return rc;
+	idxd_device_init_reset(idxd);
 	dev_dbg(dev, "IDXD reset complete\n");
 
 	idxd_read_caps(idxd);
@@ -433,11 +417,8 @@
 	int rc, i;
 	struct idxd_irq_entry *irq_entry;
 	int msixcnt = pci_msix_vec_count(pdev);
-	unsigned long flags;
 
-	spin_lock_irqsave(&idxd->dev_lock, flags);
 	rc = idxd_device_disable(idxd);
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
 	if (rc)
 		dev_err(&pdev->dev, "Disabling device failed\n");
 
@@ -453,6 +434,8 @@
 		idxd_flush_pending_llist(irq_entry);
 		idxd_flush_work_list(irq_entry);
 	}
+
+	destroy_workqueue(idxd->wq);
 }
 
 static void idxd_remove(struct pci_dev *pdev)
@@ -462,7 +445,6 @@
 	dev_dbg(&pdev->dev, "%s called\n", __func__);
 	idxd_cleanup_sysfs(idxd);
 	idxd_shutdown(pdev);
-	idxd_wqs_free_lock(idxd);
 	mutex_lock(&idxd_idr_lock);
 	idr_remove(&idxd_idrs[idxd->type], idxd->id);
 	mutex_unlock(&idxd_idr_lock);
diff --git a/drivers/dma/idxd/irq.c b/drivers/dma/idxd/irq.c
index 8a35f58..b514255 100644
--- a/drivers/dma/idxd/irq.c
+++ b/drivers/dma/idxd/irq.c
@@ -23,16 +23,13 @@
 	}
 }
 
-static int idxd_restart(struct idxd_device *idxd)
+static void idxd_device_reinit(struct work_struct *work)
 {
-	int i, rc;
+	struct idxd_device *idxd = container_of(work, struct idxd_device, work);
+	struct device *dev = &idxd->pdev->dev;
+	int rc, i;
 
-	lockdep_assert_held(&idxd->dev_lock);
-
-	rc = __idxd_device_reset(idxd);
-	if (rc < 0)
-		goto out;
-
+	idxd_device_reset(idxd);
 	rc = idxd_device_config(idxd);
 	if (rc < 0)
 		goto out;
@@ -47,19 +44,16 @@
 		if (wq->state == IDXD_WQ_ENABLED) {
 			rc = idxd_wq_enable(wq);
 			if (rc < 0) {
-				dev_warn(&idxd->pdev->dev,
-					 "Unable to re-enable wq %s\n",
+				dev_warn(dev, "Unable to re-enable wq %s\n",
 					 dev_name(&wq->conf_dev));
 			}
 		}
 	}
 
-	return 0;
+	return;
 
  out:
 	idxd_device_wqs_clear_state(idxd);
-	idxd->state = IDXD_DEV_HALTED;
-	return rc;
 }
 
 irqreturn_t idxd_irq_handler(int vec, void *data)
@@ -78,7 +72,7 @@
 	struct device *dev = &idxd->pdev->dev;
 	union gensts_reg gensts;
 	u32 cause, val = 0;
-	int i, rc;
+	int i;
 	bool err = false;
 
 	cause = ioread32(idxd->reg_base + IDXD_INTCAUSE_OFFSET);
@@ -117,8 +111,8 @@
 	}
 
 	if (cause & IDXD_INTC_CMD) {
-		/* Driver does use command interrupts */
 		val |= IDXD_INTC_CMD;
+		complete(idxd->cmd_done);
 	}
 
 	if (cause & IDXD_INTC_OCCUPY) {
@@ -145,21 +139,24 @@
 
 	gensts.bits = ioread32(idxd->reg_base + IDXD_GENSTATS_OFFSET);
 	if (gensts.state == IDXD_DEVICE_STATE_HALT) {
-		spin_lock_bh(&idxd->dev_lock);
+		idxd->state = IDXD_DEV_HALTED;
 		if (gensts.reset_type == IDXD_DEVICE_RESET_SOFTWARE) {
-			rc = idxd_restart(idxd);
-			if (rc < 0)
-				dev_err(&idxd->pdev->dev,
-					"idxd restart failed, device halt.");
+			/*
+			 * If we need a software reset, we will throw the work
+			 * on a system workqueue in order to allow interrupts
+			 * for the device command completions.
+			 */
+			INIT_WORK(&idxd->work, idxd_device_reinit);
+			queue_work(idxd->wq, &idxd->work);
 		} else {
+			spin_lock_bh(&idxd->dev_lock);
 			idxd_device_wqs_clear_state(idxd);
-			idxd->state = IDXD_DEV_HALTED;
 			dev_err(&idxd->pdev->dev,
 				"idxd halted, need %s.\n",
 				gensts.reset_type == IDXD_DEVICE_RESET_FLR ?
 				"FLR" : "system reset");
+			spin_unlock_bh(&idxd->dev_lock);
 		}
-		spin_unlock_bh(&idxd->dev_lock);
 	}
 
  out:
@@ -264,8 +261,6 @@
 
 	processed = idxd_desc_process(irq_entry);
 	idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id);
-	/* catch anything unprocessed after unmasking */
-	processed += idxd_desc_process(irq_entry);
 
 	if (processed == 0)
 		return IRQ_NONE;
diff --git a/drivers/dma/idxd/submit.c b/drivers/dma/idxd/submit.c
index 45a0c58..156a1ee 100644
--- a/drivers/dma/idxd/submit.c
+++ b/drivers/dma/idxd/submit.c
@@ -8,61 +8,61 @@
 #include "idxd.h"
 #include "registers.h"
 
-struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
+static struct idxd_desc *__get_desc(struct idxd_wq *wq, int idx, int cpu)
 {
 	struct idxd_desc *desc;
-	int idx;
-	struct idxd_device *idxd = wq->idxd;
-
-	if (idxd->state != IDXD_DEV_ENABLED)
-		return ERR_PTR(-EIO);
-
-	if (optype == IDXD_OP_BLOCK)
-		percpu_down_read(&wq->submit_lock);
-	else if (!percpu_down_read_trylock(&wq->submit_lock))
-		return ERR_PTR(-EBUSY);
-
-	if (!atomic_add_unless(&wq->dq_count, 1, wq->size)) {
-		int rc;
-
-		if (optype == IDXD_OP_NONBLOCK) {
-			percpu_up_read(&wq->submit_lock);
-			return ERR_PTR(-EAGAIN);
-		}
-
-		percpu_up_read(&wq->submit_lock);
-		percpu_down_write(&wq->submit_lock);
-		rc = wait_event_interruptible(wq->submit_waitq,
-					      atomic_add_unless(&wq->dq_count,
-								1, wq->size) ||
-					       idxd->state != IDXD_DEV_ENABLED);
-		percpu_up_write(&wq->submit_lock);
-		if (rc < 0)
-			return ERR_PTR(-EINTR);
-		if (idxd->state != IDXD_DEV_ENABLED)
-			return ERR_PTR(-EIO);
-	} else {
-		percpu_up_read(&wq->submit_lock);
-	}
-
-	idx = sbitmap_get(&wq->sbmap, 0, false);
-	if (idx < 0) {
-		atomic_dec(&wq->dq_count);
-		return ERR_PTR(-EAGAIN);
-	}
 
 	desc = wq->descs[idx];
 	memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
 	memset(desc->completion, 0, sizeof(struct dsa_completion_record));
+	desc->cpu = cpu;
 	return desc;
 }
 
+struct idxd_desc *idxd_alloc_desc(struct idxd_wq *wq, enum idxd_op_type optype)
+{
+	int cpu, idx;
+	struct idxd_device *idxd = wq->idxd;
+	DEFINE_SBQ_WAIT(wait);
+	struct sbq_wait_state *ws;
+	struct sbitmap_queue *sbq;
+
+	if (idxd->state != IDXD_DEV_ENABLED)
+		return ERR_PTR(-EIO);
+
+	sbq = &wq->sbq;
+	idx = sbitmap_queue_get(sbq, &cpu);
+	if (idx < 0) {
+		if (optype == IDXD_OP_NONBLOCK)
+			return ERR_PTR(-EAGAIN);
+	} else {
+		return __get_desc(wq, idx, cpu);
+	}
+
+	ws = &sbq->ws[0];
+	for (;;) {
+		sbitmap_prepare_to_wait(sbq, ws, &wait, TASK_INTERRUPTIBLE);
+		if (signal_pending_state(TASK_INTERRUPTIBLE, current))
+			break;
+		idx = sbitmap_queue_get(sbq, &cpu);
+		if (idx > 0)
+			break;
+		schedule();
+	}
+
+	sbitmap_finish_wait(sbq, ws, &wait);
+	if (idx < 0)
+		return ERR_PTR(-EAGAIN);
+
+	return __get_desc(wq, idx, cpu);
+}
+
 void idxd_free_desc(struct idxd_wq *wq, struct idxd_desc *desc)
 {
-	atomic_dec(&wq->dq_count);
+	int cpu = desc->cpu;
 
-	sbitmap_clear_bit(&wq->sbmap, desc->id);
-	wake_up(&wq->submit_waitq);
+	desc->cpu = -1;
+	sbitmap_queue_clear(&wq->sbq, desc->id, cpu);
 }
 
 int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
diff --git a/drivers/dma/idxd/sysfs.c b/drivers/dma/idxd/sysfs.c
index 2e2c5082..dcba609 100644
--- a/drivers/dma/idxd/sysfs.c
+++ b/drivers/dma/idxd/sysfs.c
@@ -118,12 +118,11 @@
 		if (!try_module_get(THIS_MODULE))
 			return -ENXIO;
 
-		spin_lock_irqsave(&idxd->dev_lock, flags);
-
 		/* Perform IDXD configuration and enabling */
+		spin_lock_irqsave(&idxd->dev_lock, flags);
 		rc = idxd_device_config(idxd);
+		spin_unlock_irqrestore(&idxd->dev_lock, flags);
 		if (rc < 0) {
-			spin_unlock_irqrestore(&idxd->dev_lock, flags);
 			module_put(THIS_MODULE);
 			dev_warn(dev, "Device config failed: %d\n", rc);
 			return rc;
@@ -132,18 +131,15 @@
 		/* start device */
 		rc = idxd_device_enable(idxd);
 		if (rc < 0) {
-			spin_unlock_irqrestore(&idxd->dev_lock, flags);
 			module_put(THIS_MODULE);
 			dev_warn(dev, "Device enable failed: %d\n", rc);
 			return rc;
 		}
 
-		spin_unlock_irqrestore(&idxd->dev_lock, flags);
 		dev_info(dev, "Device %s enabled\n", dev_name(dev));
 
 		rc = idxd_register_dma_device(idxd);
 		if (rc < 0) {
-			spin_unlock_irqrestore(&idxd->dev_lock, flags);
 			module_put(THIS_MODULE);
 			dev_dbg(dev, "Failed to register dmaengine device\n");
 			return rc;
@@ -188,8 +184,8 @@
 
 		spin_lock_irqsave(&idxd->dev_lock, flags);
 		rc = idxd_device_config(idxd);
+		spin_unlock_irqrestore(&idxd->dev_lock, flags);
 		if (rc < 0) {
-			spin_unlock_irqrestore(&idxd->dev_lock, flags);
 			mutex_unlock(&wq->wq_lock);
 			dev_warn(dev, "Writing WQ %d config failed: %d\n",
 				 wq->id, rc);
@@ -198,13 +194,11 @@
 
 		rc = idxd_wq_enable(wq);
 		if (rc < 0) {
-			spin_unlock_irqrestore(&idxd->dev_lock, flags);
 			mutex_unlock(&wq->wq_lock);
 			dev_warn(dev, "WQ %d enabling failed: %d\n",
 				 wq->id, rc);
 			return rc;
 		}
-		spin_unlock_irqrestore(&idxd->dev_lock, flags);
 
 		rc = idxd_wq_map_portal(wq);
 		if (rc < 0) {
@@ -212,7 +206,6 @@
 			rc = idxd_wq_disable(wq);
 			if (rc < 0)
 				dev_warn(dev, "IDXD wq disable failed\n");
-			spin_unlock_irqrestore(&idxd->dev_lock, flags);
 			mutex_unlock(&wq->wq_lock);
 			return rc;
 		}
@@ -248,7 +241,6 @@
 {
 	struct idxd_device *idxd = wq->idxd;
 	struct device *dev = &idxd->pdev->dev;
-	unsigned long flags;
 	int rc;
 
 	mutex_lock(&wq->wq_lock);
@@ -269,9 +261,8 @@
 
 	idxd_wq_unmap_portal(wq);
 
-	spin_lock_irqsave(&idxd->dev_lock, flags);
+	idxd_wq_drain(wq);
 	rc = idxd_wq_disable(wq);
-	spin_unlock_irqrestore(&idxd->dev_lock, flags);
 
 	idxd_wq_free_resources(wq);
 	wq->client_count = 0;
@@ -287,7 +278,6 @@
 static int idxd_config_bus_remove(struct device *dev)
 {
 	int rc;
-	unsigned long flags;
 
 	dev_dbg(dev, "%s called for %s\n", __func__, dev_name(dev));
 
@@ -313,14 +303,14 @@
 		}
 
 		idxd_unregister_dma_device(idxd);
-		spin_lock_irqsave(&idxd->dev_lock, flags);
 		rc = idxd_device_disable(idxd);
 		for (i = 0; i < idxd->max_wqs; i++) {
 			struct idxd_wq *wq = &idxd->wqs[i];
 
+			mutex_lock(&wq->wq_lock);
 			idxd_wq_disable_cleanup(wq);
+			mutex_unlock(&wq->wq_lock);
 		}
-		spin_unlock_irqrestore(&idxd->dev_lock, flags);
 		module_put(THIS_MODULE);
 		if (rc < 0)
 			dev_warn(dev, "Device disable failed\n");
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 270992c..4f8d8f5 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -335,7 +335,7 @@
  * @sdma:		pointer to the SDMA engine for this channel
  * @channel:		the channel number, matches dmaengine chan_id + 1
  * @direction:		transfer type. Needed for setting SDMA script
- * @slave_config	Slave configuration
+ * @slave_config:	Slave configuration
  * @peripheral_type:	Peripheral type. Needed for setting SDMA script
  * @event_id0:		aka dma request line
  * @event_id1:		for channels that use 2 events
@@ -354,8 +354,10 @@
  * @shp_addr:		value for gReg[6]
  * @per_addr:		value for gReg[2]
  * @status:		status of dma channel
+ * @context_loaded:	ensure context is only loaded once
  * @data:		specific sdma interface structure
  * @bd_pool:		dma_pool for bd
+ * @terminate_worker:	used to call back into terminate work function
  */
 struct sdma_channel {
 	struct virt_dma_chan		vc;
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index fd782ae..a814b20 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -193,7 +193,7 @@
 
 /**
  * ioat_update_pending - log pending descriptors
- * @ioat: ioat+ channel
+ * @ioat_chan: ioat+ channel
  *
  * Check if the number of unsubmitted descriptors has exceeded the
  * watermark.  Called with prep_lock held
@@ -457,7 +457,7 @@
 
 /**
  * ioat_check_space_lock - verify space and grab ring producer lock
- * @ioat: ioat,3 channel (ring) to operate on
+ * @ioat_chan: ioat,3 channel (ring) to operate on
  * @num_descs: allocation length
  */
 int ioat_check_space_lock(struct ioatdma_chan *ioat_chan, int num_descs)
@@ -585,7 +585,8 @@
 
 /**
  * __cleanup - reclaim used descriptors
- * @ioat: channel (ring) to clean
+ * @ioat_chan: channel (ring) to clean
+ * @phys_complete: zeroed (or not) completion address (from status)
  */
 static void __cleanup(struct ioatdma_chan *ioat_chan, dma_addr_t phys_complete)
 {
diff --git a/drivers/dma/ioat/init.c b/drivers/dma/ioat/init.c
index 58d1356..df0935a 100644
--- a/drivers/dma/ioat/init.c
+++ b/drivers/dma/ioat/init.c
@@ -602,7 +602,7 @@
 
 /**
  * ioat_free_chan_resources - release all the descriptors
- * @chan: the channel to be cleaned
+ * @c: the channel to be cleaned
  */
 static void ioat_free_chan_resources(struct dma_chan *c)
 {
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
index db0e274..3350bff 100644
--- a/drivers/dma/iop-adma.c
+++ b/drivers/dma/iop-adma.c
@@ -406,8 +406,7 @@
 
 /**
  * iop_adma_alloc_chan_resources -  returns the number of allocated descriptors
- * @chan - allocate descriptor resources for this channel
- * @client - current client requesting the channel be ready for requests
+ * @chan: allocate descriptor resources for this channel
  *
  * Note: We keep the slots for 1 operation on iop_chan->chain at all times.  To
  * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be
diff --git a/drivers/dma/mediatek/mtk-hsdma.c b/drivers/dma/mediatek/mtk-hsdma.c
index 4c58da7..f133ae8 100644
--- a/drivers/dma/mediatek/mtk-hsdma.c
+++ b/drivers/dma/mediatek/mtk-hsdma.c
@@ -107,10 +107,10 @@
  * struct mtk_hsdma_pdesc - This is the struct holding info describing physical
  *			    descriptor (PD) and its placement must be kept at
  *			    4-bytes alignment in little endian order.
- * @desc[1-4]:		    The control pad used to indicate hardware how to
- *			    deal with the descriptor such as source and
- *			    destination address and data length. The maximum
- *			    data length each pdesc can handle is 0x3f80 bytes
+ * @desc1:		    | The control pad used to indicate hardware how to
+ * @desc2:		    | deal with the descriptor such as source and
+ * @desc3:		    | destination address and data length. The maximum
+ * @desc4:		    | data length each pdesc can handle is 0x3f80 bytes
  */
 struct mtk_hsdma_pdesc {
 	__le32 desc1;
diff --git a/drivers/dma/mmp_pdma.c b/drivers/dma/mmp_pdma.c
index ad06f26..f42f792 100644
--- a/drivers/dma/mmp_pdma.c
+++ b/drivers/dma/mmp_pdma.c
@@ -290,7 +290,7 @@
 	spin_unlock_irqrestore(&pdev->phy_lock, flags);
 }
 
-/**
+/*
  * start_pending_queue - transfer any pending transactions
  * pending list ==> running list
  */
@@ -381,7 +381,7 @@
 	return desc;
 }
 
-/**
+/*
  * mmp_pdma_alloc_chan_resources - Allocate resources for DMA channel.
  *
  * This function will create a dma pool for descriptor allocation.
@@ -854,7 +854,7 @@
 	return ret;
 }
 
-/**
+/*
  * mmp_pdma_issue_pending - Issue the DMA start command
  * pending list ==> running list
  */
@@ -1060,7 +1060,7 @@
 	pdev->dma_channels = dma_channels;
 
 	for (i = 0; i < dma_channels; i++) {
-		if (platform_get_irq(op, i) > 0)
+		if (platform_get_irq_optional(op, i) > 0)
 			irq_num++;
 	}
 
diff --git a/drivers/dma/mmp_tdma.c b/drivers/dma/mmp_tdma.c
index dbc6a48..960c7c4 100644
--- a/drivers/dma/mmp_tdma.c
+++ b/drivers/dma/mmp_tdma.c
@@ -682,7 +682,7 @@
 	if (irq_num != chan_num) {
 		irq = platform_get_irq(pdev, 0);
 		ret = devm_request_irq(&pdev->dev, irq,
-			mmp_tdma_int_handler, 0, "tdma", tdev);
+			mmp_tdma_int_handler, IRQF_SHARED, "tdma", tdev);
 		if (ret)
 			return ret;
 	}
diff --git a/drivers/dma/mv_xor_v2.c b/drivers/dma/mv_xor_v2.c
index 157c959..9225f08 100644
--- a/drivers/dma/mv_xor_v2.c
+++ b/drivers/dma/mv_xor_v2.c
@@ -135,9 +135,11 @@
 /**
  * struct mv_xor_v2_device - implements a xor device
  * @lock: lock for the engine
+ * @clk: reference to the 'core' clock
+ * @reg_clk: reference to the 'reg' clock
  * @dma_base: memory mapped DMA register base
  * @glob_base: memory mapped global register base
- * @irq_tasklet:
+ * @irq_tasklet: tasklet used for IRQ handling call-backs
  * @free_sw_desc: linked list of free SW descriptors
  * @dmadev: dma device
  * @dmachan: dma channel
@@ -146,6 +148,8 @@
  * @sw_desq: SW descriptors queue
  * @desc_size: HW descriptor size
  * @npendings: number of pending descriptors (for which tx_submit has
+ * @hw_queue_idx: HW queue index
+ * @msi_desc: local interrupt descriptor information
  * been called, but not yet issue_pending)
  */
 struct mv_xor_v2_device {
diff --git a/drivers/dma/nbpfaxi.c b/drivers/dma/nbpfaxi.c
index 594409a..74df621 100644
--- a/drivers/dma/nbpfaxi.c
+++ b/drivers/dma/nbpfaxi.c
@@ -144,6 +144,7 @@
  * @async_tx:	dmaengine object
  * @user_wait:	waiting for a user ack
  * @length:	total transfer length
+ * @chan:	associated DMAC channel
  * @sg:		list of hardware descriptors, represented by struct nbpf_link_desc
  * @node:	member in channel descriptor lists
  */
@@ -174,13 +175,17 @@
 /**
  * struct nbpf_channel - one DMAC channel
  * @dma_chan:	standard dmaengine channel object
+ * @tasklet:	channel specific tasklet used for callbacks
  * @base:	register address base
  * @nbpf:	DMAC
  * @name:	IRQ name
  * @irq:	IRQ number
- * @slave_addr:	address for slave DMA
- * @slave_width:slave data size in bytes
- * @slave_burst:maximum slave burst size in bytes
+ * @slave_src_addr:	source address for slave DMA
+ * @slave_src_width:	source slave data size in bytes
+ * @slave_src_burst:	maximum source slave burst size in bytes
+ * @slave_dst_addr:	destination address for slave DMA
+ * @slave_dst_width:	destination slave data size in bytes
+ * @slave_dst_burst:	maximum destination slave burst size in bytes
  * @terminal:	DMA terminal, assigned to this channel
  * @dmarq_cfg:	DMA request line configuration - high / low, edge / level for NBPF_CHAN_CFG
  * @flags:	configuration flags from DT
@@ -191,6 +196,8 @@
  * @active:	list of descriptors, scheduled for processing
  * @done:	list of completed descriptors, waiting post-processing
  * @desc_page:	list of additionally allocated descriptor pages - if any
+ * @running:	linked descriptor of running transaction
+ * @paused:	are translations on this channel paused?
  */
 struct nbpf_channel {
 	struct dma_chan dma_chan;
diff --git a/drivers/dma/of-dma.c b/drivers/dma/of-dma.c
index b2c2b5e..863f2aa 100644
--- a/drivers/dma/of-dma.c
+++ b/drivers/dma/of-dma.c
@@ -46,7 +46,7 @@
 /**
  * of_dma_router_xlate - translation function for router devices
  * @dma_spec:	pointer to DMA specifier as found in the device tree
- * @of_dma:	pointer to DMA controller data (router information)
+ * @ofdma:	pointer to DMA controller data (router information)
  *
  * The function creates new dma_spec to be passed to the router driver's
  * of_dma_route_allocate() function to prepare a dma_spec which will be used
@@ -92,7 +92,7 @@
  * @np:			device node of DMA controller
  * @of_dma_xlate:	translation function which converts a phandle
  *			arguments list into a dma_chan structure
- * @data		pointer to controller specific data to be used by
+ * @data:		pointer to controller specific data to be used by
  *			translation function
  *
  * Returns 0 on success or appropriate errno value on error.
@@ -295,7 +295,7 @@
 /**
  * of_dma_simple_xlate - Simple DMA engine translation function
  * @dma_spec:	pointer to DMA specifier as found in the device tree
- * @of_dma:	pointer to DMA controller data
+ * @ofdma:	pointer to DMA controller data
  *
  * A simple translation function for devices that use a 32-bit value for the
  * filter_param when calling the DMA engine dma_request_channel() function.
@@ -323,7 +323,7 @@
 /**
  * of_dma_xlate_by_chan_id - Translate dt property to DMA channel by channel id
  * @dma_spec:	pointer to DMA specifier as found in the device tree
- * @of_dma:	pointer to DMA controller data
+ * @ofdma:	pointer to DMA controller data
  *
  * This function can be used as the of xlate callback for DMA driver which wants
  * to match the channel based on the channel id. When using this xlate function
diff --git a/drivers/dma/owl-dma.c b/drivers/dma/owl-dma.c
index 66ef70b..331c8d8 100644
--- a/drivers/dma/owl-dma.c
+++ b/drivers/dma/owl-dma.c
@@ -120,30 +120,38 @@
 #define BIT_FIELD(val, width, shift, newshift)	\
 		((((val) >> (shift)) & ((BIT(width)) - 1)) << (newshift))
 
+/* Frame count value is fixed as 1 */
+#define FCNT_VAL				0x1
+
 /**
- * struct owl_dma_lli_hw - Hardware link list for dma transfer
- * @next_lli: physical address of the next link list
- * @saddr: source physical address
- * @daddr: destination physical address
- * @flen: frame length
- * @fcnt: frame count
- * @src_stride: source stride
- * @dst_stride: destination stride
- * @ctrla: dma_mode and linklist ctrl config
- * @ctrlb: interrupt config
- * @const_num: data for constant fill
+ * owl_dmadesc_offsets - Describe DMA descriptor, hardware link
+ * list for dma transfer
+ * @OWL_DMADESC_NEXT_LLI: physical address of the next link list
+ * @OWL_DMADESC_SADDR: source physical address
+ * @OWL_DMADESC_DADDR: destination physical address
+ * @OWL_DMADESC_FLEN: frame length
+ * @OWL_DMADESC_SRC_STRIDE: source stride
+ * @OWL_DMADESC_DST_STRIDE: destination stride
+ * @OWL_DMADESC_CTRLA: dma_mode and linklist ctrl config
+ * @OWL_DMADESC_CTRLB: interrupt config
+ * @OWL_DMADESC_CONST_NUM: data for constant fill
  */
-struct owl_dma_lli_hw {
-	u32	next_lli;
-	u32	saddr;
-	u32	daddr;
-	u32	flen:20;
-	u32	fcnt:12;
-	u32	src_stride;
-	u32	dst_stride;
-	u32	ctrla;
-	u32	ctrlb;
-	u32	const_num;
+enum owl_dmadesc_offsets {
+	OWL_DMADESC_NEXT_LLI = 0,
+	OWL_DMADESC_SADDR,
+	OWL_DMADESC_DADDR,
+	OWL_DMADESC_FLEN,
+	OWL_DMADESC_SRC_STRIDE,
+	OWL_DMADESC_DST_STRIDE,
+	OWL_DMADESC_CTRLA,
+	OWL_DMADESC_CTRLB,
+	OWL_DMADESC_CONST_NUM,
+	OWL_DMADESC_SIZE
+};
+
+enum owl_dma_id {
+	S900_DMA,
+	S700_DMA,
 };
 
 /**
@@ -153,7 +161,7 @@
  * @node: node for txd's lli_list
  */
 struct owl_dma_lli {
-	struct  owl_dma_lli_hw	hw;
+	u32			hw[OWL_DMADESC_SIZE];
 	dma_addr_t		phys;
 	struct list_head	node;
 };
@@ -210,6 +218,7 @@
  * @pchans: array of data for the physical channels
  * @nr_vchans: the number of physical channels
  * @vchans: array of data for the physical channels
+ * @devid: device id based on OWL SoC
  */
 struct owl_dma {
 	struct dma_device	dma;
@@ -224,6 +233,7 @@
 
 	unsigned int		nr_vchans;
 	struct owl_dma_vchan	*vchans;
+	enum owl_dma_id		devid;
 };
 
 static void pchan_update(struct owl_dma_pchan *pchan, u32 reg,
@@ -313,11 +323,20 @@
 {
 	u32 ctl;
 
+	/*
+	 * Irrespective of the SoC, ctrlb value starts filling from
+	 * bit 18.
+	 */
 	ctl = BIT_FIELD(int_ctl, 7, 0, 18);
 
 	return ctl;
 }
 
+static u32 llc_hw_flen(struct owl_dma_lli *lli)
+{
+	return lli->hw[OWL_DMADESC_FLEN] & GENMASK(19, 0);
+}
+
 static void owl_dma_free_lli(struct owl_dma *od,
 			     struct owl_dma_lli *lli)
 {
@@ -349,8 +368,9 @@
 		list_add_tail(&next->node, &txd->lli_list);
 
 	if (prev) {
-		prev->hw.next_lli = next->phys;
-		prev->hw.ctrla |= llc_hw_ctrla(OWL_DMA_MODE_LME, 0);
+		prev->hw[OWL_DMADESC_NEXT_LLI] = next->phys;
+		prev->hw[OWL_DMADESC_CTRLA] |=
+					llc_hw_ctrla(OWL_DMA_MODE_LME, 0);
 	}
 
 	return next;
@@ -363,8 +383,8 @@
 				  struct dma_slave_config *sconfig,
 				  bool is_cyclic)
 {
-	struct owl_dma_lli_hw *hw = &lli->hw;
-	u32 mode;
+	struct owl_dma *od = to_owl_dma(vchan->vc.chan.device);
+	u32 mode, ctrlb;
 
 	mode = OWL_DMA_MODE_PW(0);
 
@@ -405,22 +425,40 @@
 		return -EINVAL;
 	}
 
-	hw->next_lli = 0; /* One link list by default */
-	hw->saddr = src;
-	hw->daddr = dst;
-
-	hw->fcnt = 1; /* Frame count fixed as 1 */
-	hw->flen = len; /* Max frame length is 1MB */
-	hw->src_stride = 0;
-	hw->dst_stride = 0;
-	hw->ctrla = llc_hw_ctrla(mode,
-				 OWL_DMA_LLC_SAV_LOAD_NEXT |
-				 OWL_DMA_LLC_DAV_LOAD_NEXT);
+	lli->hw[OWL_DMADESC_CTRLA] = llc_hw_ctrla(mode,
+						  OWL_DMA_LLC_SAV_LOAD_NEXT |
+						  OWL_DMA_LLC_DAV_LOAD_NEXT);
 
 	if (is_cyclic)
-		hw->ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_BLOCK);
+		ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_BLOCK);
 	else
-		hw->ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_SUPER_BLOCK);
+		ctrlb = llc_hw_ctrlb(OWL_DMA_INTCTL_SUPER_BLOCK);
+
+	lli->hw[OWL_DMADESC_NEXT_LLI] = 0; /* One link list by default */
+	lli->hw[OWL_DMADESC_SADDR] = src;
+	lli->hw[OWL_DMADESC_DADDR] = dst;
+	lli->hw[OWL_DMADESC_SRC_STRIDE] = 0;
+	lli->hw[OWL_DMADESC_DST_STRIDE] = 0;
+
+	if (od->devid == S700_DMA) {
+		/* Max frame length is 1MB */
+		lli->hw[OWL_DMADESC_FLEN] = len;
+		/*
+		 * On S700, word starts from offset 0x1C is shared between
+		 * frame count and ctrlb, where first 12 bits are for frame
+		 * count and rest of 20 bits are for ctrlb.
+		 */
+		lli->hw[OWL_DMADESC_CTRLB] = FCNT_VAL | ctrlb;
+	} else {
+		/*
+		 * On S900, word starts from offset 0xC is shared between
+		 * frame length (max frame length is 1MB) and frame count,
+		 * where first 20 bits are for frame length and rest of
+		 * 12 bits are for frame count.
+		 */
+		lli->hw[OWL_DMADESC_FLEN] = len | FCNT_VAL << 20;
+		lli->hw[OWL_DMADESC_CTRLB] = ctrlb;
+	}
 
 	return 0;
 }
@@ -582,7 +620,7 @@
 
 		global_irq_pending = dma_readl(od, OWL_DMA_IRQ_PD0);
 
-		if (chan_irq_pending && !(global_irq_pending & BIT(i)))	{
+		if (chan_irq_pending && !(global_irq_pending & BIT(i))) {
 			dev_dbg(od->dma.dev,
 				"global and channel IRQ pending match err\n");
 
@@ -752,7 +790,7 @@
 			/* Start from the next active node */
 			if (lli->phys == next_lli_phy) {
 				list_for_each_entry(lli, &txd->lli_list, node)
-					bytes += lli->hw.flen;
+					bytes += llc_hw_flen(lli);
 				break;
 			}
 		}
@@ -783,7 +821,7 @@
 	if (vd) {
 		txd = to_owl_txd(&vd->tx);
 		list_for_each_entry(lli, &txd->lli_list, node)
-			bytes += lli->hw.flen;
+			bytes += llc_hw_flen(lli);
 	} else {
 		bytes = owl_dma_getbytes_chan(vchan);
 	}
@@ -1040,6 +1078,13 @@
 	return chan;
 }
 
+static const struct of_device_id owl_dma_match[] = {
+	{ .compatible = "actions,s900-dma", .data = (void *)S900_DMA,},
+	{ .compatible = "actions,s700-dma", .data = (void *)S700_DMA,},
+	{ /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, owl_dma_match);
+
 static int owl_dma_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -1069,6 +1114,8 @@
 	dev_info(&pdev->dev, "dma-channels %d, dma-requests %d\n",
 		 nr_channels, nr_requests);
 
+	od->devid = (enum owl_dma_id)of_device_get_match_data(&pdev->dev);
+
 	od->nr_pchans = nr_channels;
 	od->nr_vchans = nr_requests;
 
@@ -1201,12 +1248,6 @@
 	return 0;
 }
 
-static const struct of_device_id owl_dma_match[] = {
-	{ .compatible = "actions,s900-dma", },
-	{ /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, owl_dma_match);
-
 static struct platform_driver owl_dma_driver = {
 	.probe	= owl_dma_probe,
 	.remove	= owl_dma_remove,
diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c
index 88b884c..2c508ee 100644
--- a/drivers/dma/pl330.c
+++ b/drivers/dma/pl330.c
@@ -33,7 +33,8 @@
 #define PL330_MAX_PERI		32
 #define PL330_MAX_BURST         16
 
-#define PL330_QUIRK_BROKEN_NO_FLUSHP BIT(0)
+#define PL330_QUIRK_BROKEN_NO_FLUSHP	BIT(0)
+#define PL330_QUIRK_PERIPH_BURST	BIT(1)
 
 enum pl330_cachectrl {
 	CCTRL0,		/* Noncacheable and nonbufferable */
@@ -284,7 +285,7 @@
 	u32		irq_ns;
 };
 
-/**
+/*
  * Request Configuration.
  * The PL330 core does not modify this and uses the last
  * working configuration if the request doesn't provide any.
@@ -509,6 +510,10 @@
 	{
 		.quirk = "arm,pl330-broken-no-flushp",
 		.id = PL330_QUIRK_BROKEN_NO_FLUSHP,
+	},
+	{
+		.quirk = "arm,pl330-periph-burst",
+		.id = PL330_QUIRK_PERIPH_BURST,
 	}
 };
 
@@ -885,6 +890,12 @@
 	void __iomem *regs = thrd->dmac->base;
 	u32 val;
 
+	/* If timed out due to halted state-machine */
+	if (_until_dmac_idle(thrd)) {
+		dev_err(thrd->dmac->ddma.dev, "DMAC halted!\n");
+		return;
+	}
+
 	val = (insn[0] << 16) | (insn[1] << 24);
 	if (!as_manager) {
 		val |= (1 << 0);
@@ -895,12 +906,6 @@
 	val = le32_to_cpu(*((__le32 *)&insn[2]));
 	writel(val, regs + DBGINST1);
 
-	/* If timed out due to halted state-machine */
-	if (_until_dmac_idle(thrd)) {
-		dev_err(thrd->dmac->ddma.dev, "DMAC halted!\n");
-		return;
-	}
-
 	/* Get going */
 	writel(0, regs + DBGCMD);
 }
@@ -1183,9 +1188,6 @@
 {
 	int off = 0;
 
-	if (pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)
-		cond = BURST;
-
 	/*
 	 * do FLUSHP at beginning to clear any stale dma requests before the
 	 * first WFP.
@@ -1209,6 +1211,9 @@
 	int off = 0;
 	enum pl330_cond cond = BRST_LEN(pxs->ccr) > 1 ? BURST : SINGLE;
 
+	if (pl330->quirks & PL330_QUIRK_PERIPH_BURST)
+		cond = BURST;
+
 	switch (pxs->desc->rqtype) {
 	case DMA_MEM_TO_DEV:
 		/* fall through */
@@ -1231,8 +1236,9 @@
 }
 
 /*
- * transfer dregs with single transfers to peripheral, or a reduced size burst
- * for mem-to-mem.
+ * only the unaligned burst transfers have the dregs.
+ * so, still transfer dregs with a reduced size burst
+ * for mem-to-mem, mem-to-dev or dev-to-mem.
  */
 static int _dregs(struct pl330_dmac *pl330, unsigned int dry_run, u8 buf[],
 		const struct _xfer_spec *pxs, int transfer_length)
@@ -1243,22 +1249,31 @@
 	if (transfer_length == 0)
 		return off;
 
+	/*
+	 * dregs_len = (total bytes - BURST_TO_BYTE(bursts, ccr)) /
+	 *             BRST_SIZE(ccr)
+	 * the dregs len must be smaller than burst len,
+	 * so, for higher efficiency, we can modify CCR
+	 * to use a reduced size burst len for the dregs.
+	 */
+	dregs_ccr = pxs->ccr;
+	dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) |
+		(0xf << CC_DSTBRSTLEN_SHFT));
+	dregs_ccr |= (((transfer_length - 1) & 0xf) <<
+		CC_SRCBRSTLEN_SHFT);
+	dregs_ccr |= (((transfer_length - 1) & 0xf) <<
+		CC_DSTBRSTLEN_SHFT);
+
 	switch (pxs->desc->rqtype) {
 	case DMA_MEM_TO_DEV:
 		/* fall through */
 	case DMA_DEV_TO_MEM:
-		off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs,
-			transfer_length, SINGLE);
+		off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr);
+		off += _ldst_peripheral(pl330, dry_run, &buf[off], pxs, 1,
+					BURST);
 		break;
 
 	case DMA_MEM_TO_MEM:
-		dregs_ccr = pxs->ccr;
-		dregs_ccr &= ~((0xf << CC_SRCBRSTLEN_SHFT) |
-			(0xf << CC_DSTBRSTLEN_SHFT));
-		dregs_ccr |= (((transfer_length - 1) & 0xf) <<
-			CC_SRCBRSTLEN_SHFT);
-		dregs_ccr |= (((transfer_length - 1) & 0xf) <<
-			CC_DSTBRSTLEN_SHFT);
 		off += _emit_MOV(dry_run, &buf[off], CCR, dregs_ccr);
 		off += _ldst_memtomem(dry_run, &buf[off], pxs, 1);
 		break;
@@ -2221,9 +2236,7 @@
 
 static int fixup_burst_len(int max_burst_len, int quirks)
 {
-	if (quirks & PL330_QUIRK_BROKEN_NO_FLUSHP)
-		return 1;
-	else if (max_burst_len > PL330_MAX_BURST)
+	if (max_burst_len > PL330_MAX_BURST)
 		return PL330_MAX_BURST;
 	else if (max_burst_len < 1)
 		return 1;
@@ -3128,8 +3141,7 @@
 	pd->dst_addr_widths = PL330_DMA_BUSWIDTHS;
 	pd->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
 	pd->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
-	pd->max_burst = ((pl330->quirks & PL330_QUIRK_BROKEN_NO_FLUSHP) ?
-			 1 : PL330_MAX_BURST);
+	pd->max_burst = PL330_MAX_BURST;
 
 	ret = dma_async_device_register(pd);
 	if (ret) {
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index de8bfd9..21e2f1d 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -381,6 +381,7 @@
  * struct d40_lcla_pool - LCLA pool settings and data.
  *
  * @base: The virtual address of LCLA. 18 bit aligned.
+ * @dma_addr: DMA address, if mapped
  * @base_unaligned: The orignal kmalloc pointer, if kmalloc is used.
  * This pointer is only there for clean-up on error.
  * @pages: The number of pages needed for all physical channels.
@@ -534,6 +535,7 @@
  * mode" allocated physical channels.
  * @num_log_chans: The number of logical channels. Calculated from
  * num_phy_chans.
+ * @dma_parms: DMA parameters for the channel
  * @dma_both: dma_device channels that can do both memcpy and slave transfers.
  * @dma_slave: dma_device channels that can do only do slave transfers.
  * @dma_memcpy: dma_device channels that can do only do memcpy transfers.
diff --git a/drivers/dma/sun4i-dma.c b/drivers/dma/sun4i-dma.c
index e7ff09a..e8b6633 100644
--- a/drivers/dma/sun4i-dma.c
+++ b/drivers/dma/sun4i-dma.c
@@ -307,7 +307,7 @@
 	spin_unlock_irqrestore(&priv->lock, flags);
 }
 
-/**
+/*
  * Execute pending operations on a vchan
  *
  * When given a vchan, this function will try to acquire a suitable
@@ -419,7 +419,7 @@
 	return 0;
 }
 
-/**
+/*
  * Generate a promise, to be used in a normal DMA contract.
  *
  * A NDMA promise contains all the information required to program the
@@ -486,7 +486,7 @@
 	return NULL;
 }
 
-/**
+/*
  * Generate a promise, to be used in a dedicated DMA contract.
  *
  * A DDMA promise contains all the information required to program the
@@ -543,7 +543,7 @@
 	return NULL;
 }
 
-/**
+/*
  * Generate a contract
  *
  * Contracts function as DMA descriptors. As our hardware does not support
@@ -565,7 +565,7 @@
 	return contract;
 }
 
-/**
+/*
  * Get next promise on a cyclic transfer
  *
  * Cyclic contracts contain a series of promises which are executed on a
@@ -589,7 +589,7 @@
 	return promise;
 }
 
-/**
+/*
  * Free a contract and all its associated promises
  */
 static void sun4i_dma_free_contract(struct virt_dma_desc *vd)
diff --git a/drivers/dma/ti/k3-udma-glue.c b/drivers/dma/ti/k3-udma-glue.c
index c888ae4..3a5d33e 100644
--- a/drivers/dma/ti/k3-udma-glue.c
+++ b/drivers/dma/ti/k3-udma-glue.c
@@ -186,17 +186,17 @@
 	struct device *dev = chn->common.dev;
 
 	dev_dbg(dev, "=== dump ===> %s\n", mark);
-	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_CTL_REG,
-		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG));
-	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_PEER_RT_EN_REG,
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_CTL_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PEER_RT_EN_REG,
 		xudma_tchanrt_read(chn->udma_tchanx,
-				   UDMA_TCHAN_RT_PEER_RT_EN_REG));
-	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_PCNT_REG,
-		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_PCNT_REG));
-	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_BCNT_REG,
-		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_BCNT_REG));
-	dev_dbg(dev, "0x%08X: %08X\n", UDMA_TCHAN_RT_SBCNT_REG,
-		xudma_tchanrt_read(chn->udma_tchanx, UDMA_TCHAN_RT_SBCNT_REG));
+				   UDMA_CHAN_RT_PEER_RT_EN_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PCNT_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_PCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_BCNT_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_BCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_SBCNT_REG,
+		xudma_tchanrt_read(chn->udma_tchanx, UDMA_CHAN_RT_SBCNT_REG));
 }
 
 static int k3_udma_glue_cfg_tx_chn(struct k3_udma_glue_tx_channel *tx_chn)
@@ -381,14 +381,13 @@
 	u32 txrt_ctl;
 
 	txrt_ctl = UDMA_PEER_RT_EN_ENABLE;
-	xudma_tchanrt_write(tx_chn->udma_tchanx,
-			    UDMA_TCHAN_RT_PEER_RT_EN_REG,
+	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_PEER_RT_EN_REG,
 			    txrt_ctl);
 
 	txrt_ctl = xudma_tchanrt_read(tx_chn->udma_tchanx,
-				      UDMA_TCHAN_RT_CTL_REG);
+				      UDMA_CHAN_RT_CTL_REG);
 	txrt_ctl |= UDMA_CHAN_RT_CTL_EN;
-	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG,
+	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG,
 			    txrt_ctl);
 
 	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn en");
@@ -400,10 +399,10 @@
 {
 	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis1");
 
-	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG, 0);
+	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG, 0);
 
 	xudma_tchanrt_write(tx_chn->udma_tchanx,
-			    UDMA_TCHAN_RT_PEER_RT_EN_REG, 0);
+			    UDMA_CHAN_RT_PEER_RT_EN_REG, 0);
 	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn dis2");
 }
 EXPORT_SYMBOL_GPL(k3_udma_glue_disable_tx_chn);
@@ -416,14 +415,14 @@
 
 	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown1");
 
-	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG,
+	xudma_tchanrt_write(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG,
 			    UDMA_CHAN_RT_CTL_EN | UDMA_CHAN_RT_CTL_TDOWN);
 
-	val = xudma_tchanrt_read(tx_chn->udma_tchanx, UDMA_TCHAN_RT_CTL_REG);
+	val = xudma_tchanrt_read(tx_chn->udma_tchanx, UDMA_CHAN_RT_CTL_REG);
 
 	while (sync && (val & UDMA_CHAN_RT_CTL_EN)) {
 		val = xudma_tchanrt_read(tx_chn->udma_tchanx,
-					 UDMA_TCHAN_RT_CTL_REG);
+					 UDMA_CHAN_RT_CTL_REG);
 		udelay(1);
 		if (i > K3_UDMAX_TDOWN_TIMEOUT_US) {
 			dev_err(tx_chn->common.dev, "TX tdown timeout\n");
@@ -433,7 +432,7 @@
 	}
 
 	val = xudma_tchanrt_read(tx_chn->udma_tchanx,
-				 UDMA_TCHAN_RT_PEER_RT_EN_REG);
+				 UDMA_CHAN_RT_PEER_RT_EN_REG);
 	if (sync && (val & UDMA_PEER_RT_EN_ENABLE))
 		dev_err(tx_chn->common.dev, "TX tdown peer not stopped\n");
 	k3_udma_glue_dump_tx_rt_chn(tx_chn, "txchn tdown2");
@@ -700,17 +699,17 @@
 
 	dev_dbg(dev, "=== dump ===> %s\n", mark);
 
-	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_CTL_REG,
-		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG));
-	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_PEER_RT_EN_REG,
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_CTL_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PEER_RT_EN_REG,
 		xudma_rchanrt_read(chn->udma_rchanx,
-				   UDMA_RCHAN_RT_PEER_RT_EN_REG));
-	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_PCNT_REG,
-		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_PCNT_REG));
-	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_BCNT_REG,
-		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_BCNT_REG));
-	dev_dbg(dev, "0x%08X: %08X\n", UDMA_RCHAN_RT_SBCNT_REG,
-		xudma_rchanrt_read(chn->udma_rchanx, UDMA_RCHAN_RT_SBCNT_REG));
+				   UDMA_CHAN_RT_PEER_RT_EN_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_PCNT_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_PCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_BCNT_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_BCNT_REG));
+	dev_dbg(dev, "0x%08X: %08X\n", UDMA_CHAN_RT_SBCNT_REG,
+		xudma_rchanrt_read(chn->udma_rchanx, UDMA_CHAN_RT_SBCNT_REG));
 }
 
 static int
@@ -1068,13 +1067,12 @@
 		return -EINVAL;
 
 	rxrt_ctl = xudma_rchanrt_read(rx_chn->udma_rchanx,
-				      UDMA_RCHAN_RT_CTL_REG);
+				      UDMA_CHAN_RT_CTL_REG);
 	rxrt_ctl |= UDMA_CHAN_RT_CTL_EN;
-	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG,
+	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG,
 			    rxrt_ctl);
 
-	xudma_rchanrt_write(rx_chn->udma_rchanx,
-			    UDMA_RCHAN_RT_PEER_RT_EN_REG,
+	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_PEER_RT_EN_REG,
 			    UDMA_PEER_RT_EN_ENABLE);
 
 	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt en");
@@ -1087,9 +1085,8 @@
 	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis1");
 
 	xudma_rchanrt_write(rx_chn->udma_rchanx,
-			    UDMA_RCHAN_RT_PEER_RT_EN_REG,
-			    0);
-	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG, 0);
+			    UDMA_CHAN_RT_PEER_RT_EN_REG, 0);
+	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG, 0);
 
 	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt dis2");
 }
@@ -1106,14 +1103,14 @@
 
 	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown1");
 
-	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_RCHAN_RT_PEER_RT_EN_REG,
+	xudma_rchanrt_write(rx_chn->udma_rchanx, UDMA_CHAN_RT_PEER_RT_EN_REG,
 			    UDMA_PEER_RT_EN_ENABLE | UDMA_PEER_RT_EN_TEARDOWN);
 
-	val = xudma_rchanrt_read(rx_chn->udma_rchanx, UDMA_RCHAN_RT_CTL_REG);
+	val = xudma_rchanrt_read(rx_chn->udma_rchanx, UDMA_CHAN_RT_CTL_REG);
 
 	while (sync && (val & UDMA_CHAN_RT_CTL_EN)) {
 		val = xudma_rchanrt_read(rx_chn->udma_rchanx,
-					 UDMA_RCHAN_RT_CTL_REG);
+					 UDMA_CHAN_RT_CTL_REG);
 		udelay(1);
 		if (i > K3_UDMAX_TDOWN_TIMEOUT_US) {
 			dev_err(rx_chn->common.dev, "RX tdown timeout\n");
@@ -1123,7 +1120,7 @@
 	}
 
 	val = xudma_rchanrt_read(rx_chn->udma_rchanx,
-				 UDMA_RCHAN_RT_PEER_RT_EN_REG);
+				 UDMA_CHAN_RT_PEER_RT_EN_REG);
 	if (sync && (val & UDMA_PEER_RT_EN_ENABLE))
 		dev_err(rx_chn->common.dev, "TX tdown peer not stopped\n");
 	k3_udma_glue_dump_rx_rt_chn(rx_chn, "rxrt tdown2");
diff --git a/drivers/dma/ti/k3-udma-private.c b/drivers/dma/ti/k3-udma-private.c
index 77e8e67..aa24e55 100644
--- a/drivers/dma/ti/k3-udma-private.c
+++ b/drivers/dma/ti/k3-udma-private.c
@@ -121,13 +121,17 @@
 #define XUDMA_RT_IO_FUNCTIONS(res)					\
 u32 xudma_##res##rt_read(struct udma_##res *p, int reg)			\
 {									\
-	return udma_##res##rt_read(p, reg);				\
+	if (!p)								\
+		return 0;						\
+	return udma_read(p->reg_rt, reg);				\
 }									\
 EXPORT_SYMBOL(xudma_##res##rt_read);					\
 									\
 void xudma_##res##rt_write(struct udma_##res *p, int reg, u32 val)	\
 {									\
-	udma_##res##rt_write(p, reg, val);				\
+	if (!p)								\
+		return;							\
+	udma_write(p->reg_rt, reg, val);				\
 }									\
 EXPORT_SYMBOL(xudma_##res##rt_write)
 XUDMA_RT_IO_FUNCTIONS(tchan);
diff --git a/drivers/dma/ti/k3-udma.c b/drivers/dma/ti/k3-udma.c
index 49d0d3a..c14e6cb 100644
--- a/drivers/dma/ti/k3-udma.c
+++ b/drivers/dma/ti/k3-udma.c
@@ -92,9 +92,6 @@
 	u32 flags;
 	u32 statictr_z_mask;
 	u32 rchan_oes_offset;
-
-	u8 tpl_levels;
-	u32 level_start_idx[];
 };
 
 struct udma_hwdesc {
@@ -121,6 +118,9 @@
 	void __iomem *mmrs[MMR_LAST];
 	const struct udma_match_data *match_data;
 
+	u8 tpl_levels;
+	u32 tpl_start_idx[3];
+
 	size_t desc_align; /* alignment to use for descriptors */
 
 	struct udma_tisci_rm tisci_rm;
@@ -282,51 +282,49 @@
 }
 
 /* TCHANRT */
-static inline u32 udma_tchanrt_read(struct udma_tchan *tchan, int reg)
+static inline u32 udma_tchanrt_read(struct udma_chan *uc, int reg)
 {
-	if (!tchan)
+	if (!uc->tchan)
 		return 0;
-	return udma_read(tchan->reg_rt, reg);
+	return udma_read(uc->tchan->reg_rt, reg);
 }
 
-static inline void udma_tchanrt_write(struct udma_tchan *tchan, int reg,
-				      u32 val)
+static inline void udma_tchanrt_write(struct udma_chan *uc, int reg, u32 val)
 {
-	if (!tchan)
+	if (!uc->tchan)
 		return;
-	udma_write(tchan->reg_rt, reg, val);
+	udma_write(uc->tchan->reg_rt, reg, val);
 }
 
-static inline void udma_tchanrt_update_bits(struct udma_tchan *tchan, int reg,
+static inline void udma_tchanrt_update_bits(struct udma_chan *uc, int reg,
 					    u32 mask, u32 val)
 {
-	if (!tchan)
+	if (!uc->tchan)
 		return;
-	udma_update_bits(tchan->reg_rt, reg, mask, val);
+	udma_update_bits(uc->tchan->reg_rt, reg, mask, val);
 }
 
 /* RCHANRT */
-static inline u32 udma_rchanrt_read(struct udma_rchan *rchan, int reg)
+static inline u32 udma_rchanrt_read(struct udma_chan *uc, int reg)
 {
-	if (!rchan)
+	if (!uc->rchan)
 		return 0;
-	return udma_read(rchan->reg_rt, reg);
+	return udma_read(uc->rchan->reg_rt, reg);
 }
 
-static inline void udma_rchanrt_write(struct udma_rchan *rchan, int reg,
-				      u32 val)
+static inline void udma_rchanrt_write(struct udma_chan *uc, int reg, u32 val)
 {
-	if (!rchan)
+	if (!uc->rchan)
 		return;
-	udma_write(rchan->reg_rt, reg, val);
+	udma_write(uc->rchan->reg_rt, reg, val);
 }
 
-static inline void udma_rchanrt_update_bits(struct udma_rchan *rchan, int reg,
+static inline void udma_rchanrt_update_bits(struct udma_chan *uc, int reg,
 					    u32 mask, u32 val)
 {
-	if (!rchan)
+	if (!uc->rchan)
 		return;
-	udma_update_bits(rchan->reg_rt, reg, mask, val);
+	udma_update_bits(uc->rchan->reg_rt, reg, mask, val);
 }
 
 static int navss_psil_pair(struct udma_dev *ud, u32 src_thread, u32 dst_thread)
@@ -366,18 +364,18 @@
 	if (uc->config.dir == DMA_MEM_TO_DEV || uc->config.dir == DMA_MEM_TO_MEM) {
 		dev_dbg(dev, "TCHAN State data:\n");
 		for (i = 0; i < 32; i++) {
-			offset = UDMA_TCHAN_RT_STDATA_REG + i * 4;
+			offset = UDMA_CHAN_RT_STDATA_REG + i * 4;
 			dev_dbg(dev, "TRT_STDATA[%02d]: 0x%08x\n", i,
-				udma_tchanrt_read(uc->tchan, offset));
+				udma_tchanrt_read(uc, offset));
 		}
 	}
 
 	if (uc->config.dir == DMA_DEV_TO_MEM || uc->config.dir == DMA_MEM_TO_MEM) {
 		dev_dbg(dev, "RCHAN State data:\n");
 		for (i = 0; i < 32; i++) {
-			offset = UDMA_RCHAN_RT_STDATA_REG + i * 4;
+			offset = UDMA_CHAN_RT_STDATA_REG + i * 4;
 			dev_dbg(dev, "RRT_STDATA[%02d]: 0x%08x\n", i,
-				udma_rchanrt_read(uc->rchan, offset));
+				udma_rchanrt_read(uc, offset));
 		}
 	}
 }
@@ -500,9 +498,9 @@
 	u32 rrt_ctl = 0;
 
 	if (uc->tchan)
-		trt_ctl = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_CTL_REG);
+		trt_ctl = udma_tchanrt_read(uc, UDMA_CHAN_RT_CTL_REG);
 	if (uc->rchan)
-		rrt_ctl = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_CTL_REG);
+		rrt_ctl = udma_rchanrt_read(uc, UDMA_CHAN_RT_CTL_REG);
 
 	if (trt_ctl & UDMA_CHAN_RT_CTL_EN || rrt_ctl & UDMA_CHAN_RT_CTL_EN)
 		return true;
@@ -516,17 +514,15 @@
 
 	switch (uc->config.dir) {
 	case DMA_DEV_TO_MEM:
-		val = udma_rchanrt_read(uc->rchan,
-					UDMA_RCHAN_RT_PEER_RT_EN_REG);
+		val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_RT_EN_REG);
 		pause_mask = UDMA_PEER_RT_EN_PAUSE;
 		break;
 	case DMA_MEM_TO_DEV:
-		val = udma_tchanrt_read(uc->tchan,
-					UDMA_TCHAN_RT_PEER_RT_EN_REG);
+		val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_RT_EN_REG);
 		pause_mask = UDMA_PEER_RT_EN_PAUSE;
 		break;
 	case DMA_MEM_TO_MEM:
-		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_CTL_REG);
+		val = udma_tchanrt_read(uc, UDMA_CHAN_RT_CTL_REG);
 		pause_mask = UDMA_CHAN_RT_CTL_PAUSE;
 		break;
 	default:
@@ -539,30 +535,6 @@
 	return false;
 }
 
-static void udma_sync_for_device(struct udma_chan *uc, int idx)
-{
-	struct udma_desc *d = uc->desc;
-
-	if (uc->cyclic && uc->config.pkt_mode) {
-		dma_sync_single_for_device(uc->ud->dev,
-					   d->hwdesc[idx].cppi5_desc_paddr,
-					   d->hwdesc[idx].cppi5_desc_size,
-					   DMA_TO_DEVICE);
-	} else {
-		int i;
-
-		for (i = 0; i < d->hwdesc_count; i++) {
-			if (!d->hwdesc[i].cppi5_desc_vaddr)
-				continue;
-
-			dma_sync_single_for_device(uc->ud->dev,
-						d->hwdesc[i].cppi5_desc_paddr,
-						d->hwdesc[i].cppi5_desc_size,
-						DMA_TO_DEVICE);
-		}
-	}
-}
-
 static inline dma_addr_t udma_get_rx_flush_hwdesc_paddr(struct udma_chan *uc)
 {
 	return uc->ud->rx_flush.hwdescs[uc->config.pkt_mode].cppi5_desc_paddr;
@@ -593,7 +565,6 @@
 		paddr = udma_curr_cppi5_desc_paddr(d, idx);
 
 		wmb(); /* Ensure that writes are not moved over this point */
-		udma_sync_for_device(uc, idx);
 	}
 
 	return k3_ringacc_ring_push(ring, &paddr);
@@ -613,7 +584,7 @@
 static int udma_pop_from_ring(struct udma_chan *uc, dma_addr_t *addr)
 {
 	struct k3_ring *ring = NULL;
-	int ret = -ENOENT;
+	int ret;
 
 	switch (uc->config.dir) {
 	case DMA_DEV_TO_MEM:
@@ -624,34 +595,24 @@
 		ring = uc->tchan->tc_ring;
 		break;
 	default:
-		break;
+		return -ENOENT;
 	}
 
-	if (ring && k3_ringacc_ring_get_occ(ring)) {
-		struct udma_desc *d = NULL;
+	ret = k3_ringacc_ring_pop(ring, addr);
+	if (ret)
+		return ret;
 
-		ret = k3_ringacc_ring_pop(ring, addr);
-		if (ret)
-			return ret;
+	rmb(); /* Ensure that reads are not moved before this point */
 
-		/* Teardown completion */
-		if (cppi5_desc_is_tdcm(*addr))
-			return ret;
+	/* Teardown completion */
+	if (cppi5_desc_is_tdcm(*addr))
+		return 0;
 
-		/* Check for flush descriptor */
-		if (udma_desc_is_rx_flush(uc, *addr))
-			return -ENOENT;
+	/* Check for flush descriptor */
+	if (udma_desc_is_rx_flush(uc, *addr))
+		return -ENOENT;
 
-		d = udma_udma_desc_from_paddr(uc, *addr);
-
-		if (d)
-			dma_sync_single_for_cpu(uc->ud->dev, *addr,
-						d->hwdesc[0].cppi5_desc_size,
-						DMA_FROM_DEVICE);
-		rmb(); /* Ensure that reads are not moved before this point */
-	}
-
-	return ret;
+	return 0;
 }
 
 static void udma_reset_rings(struct udma_chan *uc)
@@ -695,31 +656,31 @@
 	u32 val;
 
 	if (uc->tchan) {
-		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG);
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_BCNT_REG, val);
+		val = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
 
-		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_SBCNT_REG);
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_SBCNT_REG, val);
+		val = udma_tchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG);
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
 
-		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PCNT_REG);
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PCNT_REG, val);
+		val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PCNT_REG);
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_PCNT_REG, val);
 
-		val = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG);
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG, val);
+		val = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
 	}
 
 	if (uc->rchan) {
-		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_BCNT_REG);
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_BCNT_REG, val);
+		val = udma_rchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_BCNT_REG, val);
 
-		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_SBCNT_REG);
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_SBCNT_REG, val);
+		val = udma_rchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG);
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_SBCNT_REG, val);
 
-		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_PCNT_REG);
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PCNT_REG, val);
+		val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PCNT_REG);
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_PCNT_REG, val);
 
-		val = udma_rchanrt_read(uc->rchan, UDMA_RCHAN_RT_PEER_BCNT_REG);
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_BCNT_REG, val);
+		val = udma_rchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_BCNT_REG, val);
 	}
 
 	uc->bcnt = 0;
@@ -729,16 +690,16 @@
 {
 	switch (uc->config.dir) {
 	case DMA_DEV_TO_MEM:
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG, 0);
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, 0);
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, 0);
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0);
 		break;
 	case DMA_MEM_TO_DEV:
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, 0);
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG, 0);
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0);
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG, 0);
 		break;
 	case DMA_MEM_TO_MEM:
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG, 0);
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG, 0);
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0);
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG, 0);
 		break;
 	default:
 		return -EINVAL;
@@ -766,7 +727,7 @@
 		 * the rchan.
 		 */
 		if (uc->config.dir == DMA_DEV_TO_MEM)
-			udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG,
+			udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
 					   UDMA_CHAN_RT_CTL_EN |
 					   UDMA_CHAN_RT_CTL_TDOWN |
 					   UDMA_CHAN_RT_CTL_FTDOWN);
@@ -843,11 +804,12 @@
 			if (uc->config.enable_burst)
 				val |= PDMA_STATIC_TR_XY_BURST;
 
-			udma_rchanrt_write(uc->rchan,
-				UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG, val);
+			udma_rchanrt_write(uc,
+					   UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG,
+					   val);
 
-			udma_rchanrt_write(uc->rchan,
-				UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG,
+			udma_rchanrt_write(uc,
+				UDMA_CHAN_RT_PEER_STATIC_TR_Z_REG,
 				PDMA_STATIC_TR_Z(uc->desc->static_tr.bstcnt,
 						 match_data->statictr_z_mask));
 
@@ -856,11 +818,11 @@
 			       sizeof(uc->static_tr));
 		}
 
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG,
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
 				   UDMA_CHAN_RT_CTL_EN);
 
 		/* Enable remote */
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG,
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
 				   UDMA_PEER_RT_EN_ENABLE);
 
 		break;
@@ -875,8 +837,9 @@
 			if (uc->config.enable_burst)
 				val |= PDMA_STATIC_TR_XY_BURST;
 
-			udma_tchanrt_write(uc->tchan,
-				UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG, val);
+			udma_tchanrt_write(uc,
+					   UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG,
+					   val);
 
 			/* save the current staticTR configuration */
 			memcpy(&uc->static_tr, &uc->desc->static_tr,
@@ -884,17 +847,17 @@
 		}
 
 		/* Enable remote */
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG,
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
 				   UDMA_PEER_RT_EN_ENABLE);
 
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
 				   UDMA_CHAN_RT_CTL_EN);
 
 		break;
 	case DMA_MEM_TO_MEM:
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_CTL_REG,
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
 				   UDMA_CHAN_RT_CTL_EN);
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
 				   UDMA_CHAN_RT_CTL_EN);
 
 		break;
@@ -920,20 +883,20 @@
 		if (!uc->cyclic && !uc->desc)
 			udma_push_to_ring(uc, -1);
 
-		udma_rchanrt_write(uc->rchan, UDMA_RCHAN_RT_PEER_RT_EN_REG,
+		udma_rchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
 				   UDMA_PEER_RT_EN_ENABLE |
 				   UDMA_PEER_RT_EN_TEARDOWN);
 		break;
 	case DMA_MEM_TO_DEV:
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_PEER_RT_EN_REG,
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
 				   UDMA_PEER_RT_EN_ENABLE |
 				   UDMA_PEER_RT_EN_FLUSH);
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
 				   UDMA_CHAN_RT_CTL_EN |
 				   UDMA_CHAN_RT_CTL_TDOWN);
 		break;
 	case DMA_MEM_TO_MEM:
-		udma_tchanrt_write(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+		udma_tchanrt_write(uc, UDMA_CHAN_RT_CTL_REG,
 				   UDMA_CHAN_RT_CTL_EN |
 				   UDMA_CHAN_RT_CTL_TDOWN);
 		break;
@@ -973,8 +936,8 @@
 	    uc->config.dir != DMA_MEM_TO_DEV)
 		return true;
 
-	peer_bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_PEER_BCNT_REG);
-	bcnt = udma_tchanrt_read(uc->tchan, UDMA_TCHAN_RT_BCNT_REG);
+	peer_bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_PEER_BCNT_REG);
+	bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
 
 	/* Transfer is incomplete, store current residue and time stamp */
 	if (peer_bcnt < bcnt) {
@@ -1247,10 +1210,10 @@
 	} else {							\
 		int start;						\
 									\
-		if (tpl >= ud->match_data->tpl_levels)			\
-			tpl = ud->match_data->tpl_levels - 1;		\
+		if (tpl >= ud->tpl_levels)				\
+			tpl = ud->tpl_levels - 1;			\
 									\
-		start = ud->match_data->level_start_idx[tpl];		\
+		start = ud->tpl_start_idx[tpl];				\
 									\
 		id = find_next_zero_bit(ud->res##_map, ud->res##_cnt,	\
 					start);				\
@@ -1299,7 +1262,6 @@
 static int udma_get_chan_pair(struct udma_chan *uc)
 {
 	struct udma_dev *ud = uc->ud;
-	const struct udma_match_data *match_data = ud->match_data;
 	int chan_id, end;
 
 	if ((uc->tchan && uc->rchan) && uc->tchan->id == uc->rchan->id) {
@@ -1321,7 +1283,7 @@
 	/* Can be optimized, but let's have it like this for now */
 	end = min(ud->tchan_cnt, ud->rchan_cnt);
 	/* Try to use the highest TPL channel pair for MEM_TO_MEM channels */
-	chan_id = match_data->level_start_idx[match_data->tpl_levels - 1];
+	chan_id = ud->tpl_start_idx[ud->tpl_levels - 1];
 	for (; chan_id < end; chan_id++) {
 		if (!test_bit(chan_id, ud->tchan_map) &&
 		    !test_bit(chan_id, ud->rchan_map))
@@ -2195,7 +2157,7 @@
 	u32 ring_id;
 	unsigned int i;
 
-	d = kzalloc(sizeof(*d) + sglen * sizeof(d->hwdesc[0]), GFP_NOWAIT);
+	d = kzalloc(struct_size(d, hwdesc, sglen), GFP_NOWAIT);
 	if (!d)
 		return NULL;
 
@@ -2511,7 +2473,7 @@
 	if (period_len >= SZ_4M)
 		return NULL;
 
-	d = kzalloc(sizeof(*d) + periods * sizeof(d->hwdesc[0]), GFP_NOWAIT);
+	d = kzalloc(struct_size(d, hwdesc, periods), GFP_NOWAIT);
 	if (!d)
 		return NULL;
 
@@ -2761,30 +2723,27 @@
 		u32 delay = 0;
 
 		if (uc->desc->dir == DMA_MEM_TO_DEV) {
-			bcnt = udma_tchanrt_read(uc->tchan,
-						 UDMA_TCHAN_RT_SBCNT_REG);
+			bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_SBCNT_REG);
 
 			if (uc->config.ep_type != PSIL_EP_NATIVE) {
-				peer_bcnt = udma_tchanrt_read(uc->tchan,
-						UDMA_TCHAN_RT_PEER_BCNT_REG);
+				peer_bcnt = udma_tchanrt_read(uc,
+						UDMA_CHAN_RT_PEER_BCNT_REG);
 
 				if (bcnt > peer_bcnt)
 					delay = bcnt - peer_bcnt;
 			}
 		} else if (uc->desc->dir == DMA_DEV_TO_MEM) {
-			bcnt = udma_rchanrt_read(uc->rchan,
-						 UDMA_RCHAN_RT_BCNT_REG);
+			bcnt = udma_rchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
 
 			if (uc->config.ep_type != PSIL_EP_NATIVE) {
-				peer_bcnt = udma_rchanrt_read(uc->rchan,
-						UDMA_RCHAN_RT_PEER_BCNT_REG);
+				peer_bcnt = udma_rchanrt_read(uc,
+						UDMA_CHAN_RT_PEER_BCNT_REG);
 
 				if (peer_bcnt > bcnt)
 					delay = peer_bcnt - bcnt;
 			}
 		} else {
-			bcnt = udma_tchanrt_read(uc->tchan,
-						 UDMA_TCHAN_RT_BCNT_REG);
+			bcnt = udma_tchanrt_read(uc, UDMA_CHAN_RT_BCNT_REG);
 		}
 
 		bcnt -= uc->bcnt;
@@ -2817,19 +2776,17 @@
 	/* pause the channel */
 	switch (uc->config.dir) {
 	case DMA_DEV_TO_MEM:
-		udma_rchanrt_update_bits(uc->rchan,
-					 UDMA_RCHAN_RT_PEER_RT_EN_REG,
+		udma_rchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
 					 UDMA_PEER_RT_EN_PAUSE,
 					 UDMA_PEER_RT_EN_PAUSE);
 		break;
 	case DMA_MEM_TO_DEV:
-		udma_tchanrt_update_bits(uc->tchan,
-					 UDMA_TCHAN_RT_PEER_RT_EN_REG,
+		udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
 					 UDMA_PEER_RT_EN_PAUSE,
 					 UDMA_PEER_RT_EN_PAUSE);
 		break;
 	case DMA_MEM_TO_MEM:
-		udma_tchanrt_update_bits(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+		udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_CTL_REG,
 					 UDMA_CHAN_RT_CTL_PAUSE,
 					 UDMA_CHAN_RT_CTL_PAUSE);
 		break;
@@ -2847,18 +2804,16 @@
 	/* resume the channel */
 	switch (uc->config.dir) {
 	case DMA_DEV_TO_MEM:
-		udma_rchanrt_update_bits(uc->rchan,
-					 UDMA_RCHAN_RT_PEER_RT_EN_REG,
+		udma_rchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
 					 UDMA_PEER_RT_EN_PAUSE, 0);
 
 		break;
 	case DMA_MEM_TO_DEV:
-		udma_tchanrt_update_bits(uc->tchan,
-					 UDMA_TCHAN_RT_PEER_RT_EN_REG,
+		udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_PEER_RT_EN_REG,
 					 UDMA_PEER_RT_EN_PAUSE, 0);
 		break;
 	case DMA_MEM_TO_MEM:
-		udma_tchanrt_update_bits(uc->tchan, UDMA_TCHAN_RT_CTL_REG,
+		udma_tchanrt_update_bits(uc, UDMA_CHAN_RT_CTL_REG,
 					 UDMA_CHAN_RT_CTL_PAUSE, 0);
 		break;
 	default:
@@ -3147,11 +3102,6 @@
 	.enable_memcpy_support = true,
 	.statictr_z_mask = GENMASK(11, 0),
 	.rchan_oes_offset = 0x2000,
-	.tpl_levels = 2,
-	.level_start_idx = {
-		[0] = 8, /* Normal channels */
-		[1] = 0, /* High Throughput channels */
-	},
 };
 
 static struct udma_match_data am654_mcu_data = {
@@ -3159,11 +3109,6 @@
 	.enable_memcpy_support = false,
 	.statictr_z_mask = GENMASK(11, 0),
 	.rchan_oes_offset = 0x2000,
-	.tpl_levels = 2,
-	.level_start_idx = {
-		[0] = 2, /* Normal channels */
-		[1] = 0, /* High Throughput channels */
-	},
 };
 
 static struct udma_match_data j721e_main_data = {
@@ -3172,12 +3117,6 @@
 	.flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST,
 	.statictr_z_mask = GENMASK(23, 0),
 	.rchan_oes_offset = 0x400,
-	.tpl_levels = 3,
-	.level_start_idx = {
-		[0] = 16, /* Normal channels */
-		[1] = 4, /* High Throughput channels */
-		[2] = 0, /* Ultra High Throughput channels */
-	},
 };
 
 static struct udma_match_data j721e_mcu_data = {
@@ -3186,11 +3125,6 @@
 	.flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST,
 	.statictr_z_mask = GENMASK(23, 0),
 	.rchan_oes_offset = 0x400,
-	.tpl_levels = 2,
-	.level_start_idx = {
-		[0] = 2, /* Normal channels */
-		[1] = 0, /* High Throughput channels */
-	},
 };
 
 static const struct of_device_id udma_of_match[] = {
@@ -3239,15 +3173,36 @@
 						    "ti,sci-rm-range-rchan",
 						    "ti,sci-rm-range-rflow" };
 
-	cap2 = udma_read(ud->mmrs[MMR_GCFG], 0x28);
-	cap3 = udma_read(ud->mmrs[MMR_GCFG], 0x2c);
+	cap2 = udma_read(ud->mmrs[MMR_GCFG], UDMA_CAP_REG(2));
+	cap3 = udma_read(ud->mmrs[MMR_GCFG], UDMA_CAP_REG(3));
 
-	ud->rflow_cnt = cap3 & 0x3fff;
-	ud->tchan_cnt = cap2 & 0x1ff;
-	ud->echan_cnt = (cap2 >> 9) & 0x1ff;
-	ud->rchan_cnt = (cap2 >> 18) & 0x1ff;
+	ud->rflow_cnt = UDMA_CAP3_RFLOW_CNT(cap3);
+	ud->tchan_cnt = UDMA_CAP2_TCHAN_CNT(cap2);
+	ud->echan_cnt = UDMA_CAP2_ECHAN_CNT(cap2);
+	ud->rchan_cnt = UDMA_CAP2_RCHAN_CNT(cap2);
 	ch_count  = ud->tchan_cnt + ud->rchan_cnt;
 
+	/* Set up the throughput level start indexes */
+	if (of_device_is_compatible(dev->of_node,
+				    "ti,am654-navss-main-udmap")) {
+		ud->tpl_levels = 2;
+		ud->tpl_start_idx[0] = 8;
+	} else if (of_device_is_compatible(dev->of_node,
+					   "ti,am654-navss-mcu-udmap")) {
+		ud->tpl_levels = 2;
+		ud->tpl_start_idx[0] = 2;
+	} else if (UDMA_CAP3_UCHAN_CNT(cap3)) {
+		ud->tpl_levels = 3;
+		ud->tpl_start_idx[1] = UDMA_CAP3_UCHAN_CNT(cap3);
+		ud->tpl_start_idx[0] = ud->tpl_start_idx[1] +
+				       UDMA_CAP3_HCHAN_CNT(cap3);
+	} else if (UDMA_CAP3_HCHAN_CNT(cap3)) {
+		ud->tpl_levels = 2;
+		ud->tpl_start_idx[0] = UDMA_CAP3_HCHAN_CNT(cap3);
+	} else {
+		ud->tpl_levels = 1;
+	}
+
 	ud->tchan_map = devm_kmalloc_array(dev, BITS_TO_LONGS(ud->tchan_cnt),
 					   sizeof(unsigned long), GFP_KERNEL);
 	ud->tchans = devm_kcalloc(dev, ud->tchan_cnt, sizeof(*ud->tchans),
diff --git a/drivers/dma/ti/k3-udma.h b/drivers/dma/ti/k3-udma.h
index 128d874..09c4529 100644
--- a/drivers/dma/ti/k3-udma.h
+++ b/drivers/dma/ti/k3-udma.h
@@ -18,52 +18,41 @@
 #define UDMA_RX_FLOW_ID_FW_OES_REG	0x80
 #define UDMA_RX_FLOW_ID_FW_STATUS_REG	0x88
 
-/* TX chan RT regs */
-#define UDMA_TCHAN_RT_CTL_REG		0x0
-#define UDMA_TCHAN_RT_SWTRIG_REG	0x8
-#define UDMA_TCHAN_RT_STDATA_REG	0x80
+/* TCHANRT/RCHANRT registers */
+#define UDMA_CHAN_RT_CTL_REG		0x0
+#define UDMA_CHAN_RT_SWTRIG_REG		0x8
+#define UDMA_CHAN_RT_STDATA_REG		0x80
 
-#define UDMA_TCHAN_RT_PEER_REG(i)	(0x200 + ((i) * 0x4))
-#define UDMA_TCHAN_RT_PEER_STATIC_TR_XY_REG	\
-	UDMA_TCHAN_RT_PEER_REG(0)	/* PSI-L: 0x400 */
-#define UDMA_TCHAN_RT_PEER_STATIC_TR_Z_REG	\
-	UDMA_TCHAN_RT_PEER_REG(1)	/* PSI-L: 0x401 */
-#define UDMA_TCHAN_RT_PEER_BCNT_REG		\
-	UDMA_TCHAN_RT_PEER_REG(4)	/* PSI-L: 0x404 */
-#define UDMA_TCHAN_RT_PEER_RT_EN_REG		\
-	UDMA_TCHAN_RT_PEER_REG(8)	/* PSI-L: 0x408 */
+#define UDMA_CHAN_RT_PEER_REG(i)	(0x200 + ((i) * 0x4))
+#define UDMA_CHAN_RT_PEER_STATIC_TR_XY_REG	\
+	UDMA_CHAN_RT_PEER_REG(0)	/* PSI-L: 0x400 */
+#define UDMA_CHAN_RT_PEER_STATIC_TR_Z_REG	\
+	UDMA_CHAN_RT_PEER_REG(1)	/* PSI-L: 0x401 */
+#define UDMA_CHAN_RT_PEER_BCNT_REG		\
+	UDMA_CHAN_RT_PEER_REG(4)	/* PSI-L: 0x404 */
+#define UDMA_CHAN_RT_PEER_RT_EN_REG		\
+	UDMA_CHAN_RT_PEER_REG(8)	/* PSI-L: 0x408 */
 
-#define UDMA_TCHAN_RT_PCNT_REG		0x400
-#define UDMA_TCHAN_RT_BCNT_REG		0x408
-#define UDMA_TCHAN_RT_SBCNT_REG		0x410
+#define UDMA_CHAN_RT_PCNT_REG		0x400
+#define UDMA_CHAN_RT_BCNT_REG		0x408
+#define UDMA_CHAN_RT_SBCNT_REG		0x410
 
-/* RX chan RT regs */
-#define UDMA_RCHAN_RT_CTL_REG		0x0
-#define UDMA_RCHAN_RT_SWTRIG_REG	0x8
-#define UDMA_RCHAN_RT_STDATA_REG	0x80
+/* UDMA_CAP Registers */
+#define UDMA_CAP2_TCHAN_CNT(val)	((val) & 0x1ff)
+#define UDMA_CAP2_ECHAN_CNT(val)	(((val) >> 9) & 0x1ff)
+#define UDMA_CAP2_RCHAN_CNT(val)	(((val) >> 18) & 0x1ff)
+#define UDMA_CAP3_RFLOW_CNT(val)	((val) & 0x3fff)
+#define UDMA_CAP3_HCHAN_CNT(val)	(((val) >> 14) & 0x1ff)
+#define UDMA_CAP3_UCHAN_CNT(val)	(((val) >> 23) & 0x1ff)
 
-#define UDMA_RCHAN_RT_PEER_REG(i)	(0x200 + ((i) * 0x4))
-#define UDMA_RCHAN_RT_PEER_STATIC_TR_XY_REG	\
-	UDMA_RCHAN_RT_PEER_REG(0)	/* PSI-L: 0x400 */
-#define UDMA_RCHAN_RT_PEER_STATIC_TR_Z_REG	\
-	UDMA_RCHAN_RT_PEER_REG(1)	/* PSI-L: 0x401 */
-#define UDMA_RCHAN_RT_PEER_BCNT_REG		\
-	UDMA_RCHAN_RT_PEER_REG(4)	/* PSI-L: 0x404 */
-#define UDMA_RCHAN_RT_PEER_RT_EN_REG		\
-	UDMA_RCHAN_RT_PEER_REG(8)	/* PSI-L: 0x408 */
-
-#define UDMA_RCHAN_RT_PCNT_REG		0x400
-#define UDMA_RCHAN_RT_BCNT_REG		0x408
-#define UDMA_RCHAN_RT_SBCNT_REG		0x410
-
-/* UDMA_TCHAN_RT_CTL_REG/UDMA_RCHAN_RT_CTL_REG */
+/* UDMA_CHAN_RT_CTL_REG */
 #define UDMA_CHAN_RT_CTL_EN		BIT(31)
 #define UDMA_CHAN_RT_CTL_TDOWN		BIT(30)
 #define UDMA_CHAN_RT_CTL_PAUSE		BIT(29)
 #define UDMA_CHAN_RT_CTL_FTDOWN		BIT(28)
 #define UDMA_CHAN_RT_CTL_ERROR		BIT(0)
 
-/* UDMA_TCHAN_RT_PEER_RT_EN_REG/UDMA_RCHAN_RT_PEER_RT_EN_REG (PSI-L: 0x408) */
+/* UDMA_CHAN_RT_PEER_RT_EN_REG */
 #define UDMA_PEER_RT_EN_ENABLE		BIT(31)
 #define UDMA_PEER_RT_EN_TEARDOWN	BIT(30)
 #define UDMA_PEER_RT_EN_PAUSE		BIT(29)
diff --git a/drivers/dma/xgene-dma.c b/drivers/dma/xgene-dma.c
index cd60fa6..4f733d3 100644
--- a/drivers/dma/xgene-dma.c
+++ b/drivers/dma/xgene-dma.c
@@ -287,6 +287,8 @@
 
 /**
  * struct xgene_dma - internal representation of an X-Gene DMA device
+ * @dev: reference to this device's struct device
+ * @clk: reference to this device's clock
  * @err_irq: DMA error irq number
  * @ring_num: start id number for DMA ring
  * @csr_dma: base for DMA register access
diff --git a/drivers/dma/xilinx/xilinx_dpdma.c b/drivers/dma/xilinx/xilinx_dpdma.c
index af88a67..b37197c 100644
--- a/drivers/dma/xilinx/xilinx_dpdma.c
+++ b/drivers/dma/xilinx/xilinx_dpdma.c
@@ -214,6 +214,7 @@
  * @lock: lock to access struct xilinx_dpdma_chan
  * @desc_pool: descriptor allocation pool
  * @err_task: error IRQ bottom half handler
+ * @desc: References to descriptors being processed
  * @desc.pending: Descriptor schedule to the hardware, pending execution
  * @desc.active: Descriptor being executed by the hardware
  * @xdev: DPDMA device
@@ -295,6 +296,7 @@
 
 /**
  * xilinx_dpdma_sw_desc_set_dma_addrs - Set DMA addresses in the descriptor
+ * @xdev: DPDMA device
  * @sw_desc: The software descriptor in which to set DMA addresses
  * @prev: The previous descriptor
  * @dma_addr: array of dma addresses
@@ -1070,7 +1072,7 @@
 	 * Abuse the slave_id to indicate that the channel is part of a video
 	 * group.
 	 */
-	if (chan->id >= ZYNQMP_DPDMA_VIDEO0 && chan->id <= ZYNQMP_DPDMA_VIDEO2)
+	if (chan->id <= ZYNQMP_DPDMA_VIDEO2)
 		chan->video_group = config->slave_id != 0;
 
 	spin_unlock_irqrestore(&chan->lock, flags);
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index abd8cff..6fbd5c9 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -39,6 +39,7 @@
 	DMA_IN_PROGRESS,
 	DMA_PAUSED,
 	DMA_ERROR,
+	DMA_OUT_OF_ORDER,
 };
 
 /**
@@ -61,6 +62,7 @@
 	DMA_SLAVE,
 	DMA_CYCLIC,
 	DMA_INTERLEAVE,
+	DMA_COMPLETION_NO_ORDER,
 	DMA_REPEAT,
 	DMA_LOAD_EOT,
 /* last transaction type for creation of the capabilities mask */
@@ -164,7 +166,7 @@
  * @DMA_PREP_INTERRUPT - trigger an interrupt (callback) upon completion of
  *  this transaction
  * @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client
- *  acknowledges receipt, i.e. has has a chance to establish any dependency
+ *  acknowledges receipt, i.e. has a chance to establish any dependency
  *  chains
  * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q
  * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P
@@ -479,7 +481,11 @@
  *	Since the enum dma_transfer_direction is not defined as bit flag for
  *	each type, the dma controller should set BIT(<TYPE>) and same
  *	should be checked by controller as well
+ * @min_burst: min burst capability per-transfer
  * @max_burst: max burst capability per-transfer
+ * @max_sg_burst: max number of SG list entries executed in a single burst
+ *	DMA tansaction with no software intervention for reinitialization.
+ *	Zero value means unlimited number of entries.
  * @cmd_pause: true, if pause is supported (i.e. for reading residue or
  *	       for resume later)
  * @cmd_resume: true, if resume is supported
@@ -492,7 +498,9 @@
 	u32 src_addr_widths;
 	u32 dst_addr_widths;
 	u32 directions;
+	u32 min_burst;
 	u32 max_burst;
+	u32 max_sg_burst;
 	bool cmd_pause;
 	bool cmd_resume;
 	bool cmd_terminate;
@@ -783,7 +791,11 @@
  *	Since the enum dma_transfer_direction is not defined as bit flag for
  *	each type, the dma controller should set BIT(<TYPE>) and same
  *	should be checked by controller as well
+ * @min_burst: min burst capability per-transfer
  * @max_burst: max burst capability per-transfer
+ * @max_sg_burst: max number of SG list entries executed in a single burst
+ *	DMA tansaction with no software intervention for reinitialization.
+ *	Zero value means unlimited number of entries.
  * @residue_granularity: granularity of the transfer residue reported
  *	by tx_status
  * @device_alloc_chan_resources: allocate resources and return the
@@ -803,6 +815,8 @@
  *	be called after period_len bytes have been transferred.
  * @device_prep_interleaved_dma: Transfer expression in a generic way.
  * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address
+ * @device_caps: May be used to override the generic DMA slave capabilities
+ *	with per-channel specific ones
  * @device_config: Pushes a new configuration to a channel, return 0 or an error
  *	code
  * @device_pause: Pauses any transfer happening on a channel. Returns
@@ -853,7 +867,9 @@
 	u32 src_addr_widths;
 	u32 dst_addr_widths;
 	u32 directions;
+	u32 min_burst;
 	u32 max_burst;
+	u32 max_sg_burst;
 	bool descriptor_reuse;
 	enum dma_residue_granularity residue_granularity;
 
@@ -901,6 +917,8 @@
 		struct dma_chan *chan, dma_addr_t dst, u64 data,
 		unsigned long flags);
 
+	void (*device_caps)(struct dma_chan *chan,
+			    struct dma_slave_caps *caps);
 	int (*device_config)(struct dma_chan *chan,
 			     struct dma_slave_config *config);
 	int (*device_pause)(struct dma_chan *chan);
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index f3eaf9e..fbbeb2f 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -8,10 +8,15 @@
 #ifndef _PLATFORM_DATA_DMA_DW_H
 #define _PLATFORM_DATA_DMA_DW_H
 
-#include <linux/device.h>
+#include <linux/bits.h>
+#include <linux/types.h>
 
 #define DW_DMA_MAX_NR_MASTERS	4
 #define DW_DMA_MAX_NR_CHANNELS	8
+#define DW_DMA_MIN_BURST	1
+#define DW_DMA_MAX_BURST	256
+
+struct device;
 
 /**
  * struct dw_dma_slave - Controller-specific information about a slave
@@ -42,6 +47,8 @@
  * @data_width: Maximum data width supported by hardware per AHB master
  *		(in bytes, power of 2)
  * @multi_block: Multi block transfers supported by hardware per channel.
+ * @max_burst: Maximum value of burst transaction size supported by hardware
+ *	       per channel (in units of CTL.SRC_TR_WIDTH/CTL.DST_TR_WIDTH).
  * @protctl: Protection control signals setting per channel.
  */
 struct dw_dma_platform_data {
@@ -56,6 +63,7 @@
 	unsigned char	nr_masters;
 	unsigned char	data_width[DW_DMA_MAX_NR_MASTERS];
 	unsigned char	multi_block[DW_DMA_MAX_NR_CHANNELS];
+	u32		max_burst[DW_DMA_MAX_NR_CHANNELS];
 #define CHAN_PROTCTL_PRIVILEGED		BIT(0)
 #define CHAN_PROTCTL_BUFFERABLE		BIT(1)
 #define CHAN_PROTCTL_CACHEABLE		BIT(2)
diff --git a/include/uapi/linux/idxd.h b/include/uapi/linux/idxd.h
index e103c14..fdcdfe4 100644
--- a/include/uapi/linux/idxd.h
+++ b/include/uapi/linux/idxd.h
@@ -181,6 +181,12 @@
 	uint32_t		bytes_completed;
 	uint64_t		fault_addr;
 	union {
+		/* common record */
+		struct {
+			uint32_t	invalid_flags:24;
+			uint32_t	rsvd2:8;
+		};
+
 		uint16_t	delta_rec_size;
 		uint16_t	crc_val;