Merge branch 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto updates from Herbert Xu:
 "API:
   - Add the ability to abort a skcipher walk.

  Algorithms:
   - Fix XTS to actually do the stealing.
   - Add library helpers for AES and DES for single-block users.
   - Add library helpers for SHA256.
   - Add new DES key verification helper.
   - Add surrounding bits for ESSIV generator.
   - Add accelerations for aegis128.
   - Add test vectors for lzo-rle.

  Drivers:
   - Add i.MX8MQ support to caam.
   - Add gcm/ccm/cfb/ofb aes support in inside-secure.
   - Add ofb/cfb aes support in media-tek.
   - Add HiSilicon ZIP accelerator support.

  Others:
   - Fix potential race condition in padata.
   - Use unbound workqueues in padata"

* 'linus' of git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (311 commits)
  crypto: caam - Cast to long first before pointer conversion
  crypto: ccree - enable CTS support in AES-XTS
  crypto: inside-secure - Probe transform record cache RAM sizes
  crypto: inside-secure - Base RD fetchcount on actual RD FIFO size
  crypto: inside-secure - Base CD fetchcount on actual CD FIFO size
  crypto: inside-secure - Enable extended algorithms on newer HW
  crypto: inside-secure: Corrected configuration of EIP96_TOKEN_CTRL
  crypto: inside-secure - Add EIP97/EIP197 and endianness detection
  padata: remove cpu_index from the parallel_queue
  padata: unbind parallel jobs from specific CPUs
  padata: use separate workqueues for parallel and serial work
  padata, pcrypt: take CPU hotplug lock internally in padata_alloc_possible
  crypto: pcrypt - remove padata cpumask notifier
  padata: make padata_do_parallel find alternate callback CPU
  workqueue: require CPU hotplug read exclusion for apply_workqueue_attrs
  workqueue: unconfine alloc/apply/free_workqueue_attrs()
  padata: allocate workqueue internally
  arm64: dts: imx8mq: Add CAAM node
  random: Use wait_event_freezable() in add_hwgenerator_randomness()
  crypto: ux500 - Fix COMPILE_TEST warnings
  ...
diff --git a/Documentation/ABI/testing/debugfs-hisi-zip b/Documentation/ABI/testing/debugfs-hisi-zip
new file mode 100644
index 0000000..a7c63e6
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-hisi-zip
@@ -0,0 +1,50 @@
+What:           /sys/kernel/debug/hisi_zip/<bdf>/comp_core[01]/regs
+Date:           Nov 2018
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump of compression cores related debug registers.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/decomp_core[0-5]/regs
+Date:           Nov 2018
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump of decompression cores related debug registers.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/clear_enable
+Date:           Nov 2018
+Contact:        linux-crypto@vger.kernel.org
+Description:    Compression/decompression core debug registers read clear
+		control. 1 means enable register read clear, otherwise 0.
+		Writing to this file has no functional effect, only enable or
+		disable counters clear after reading of these registers.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/current_qm
+Date:           Nov 2018
+Contact:        linux-crypto@vger.kernel.org
+Description:    One ZIP controller has one PF and multiple VFs, each function
+		has a QM. Select the QM which below qm refers to.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/qm_regs
+Date:           Nov 2018
+Contact:        linux-crypto@vger.kernel.org
+Description:    Dump of QM related debug registers.
+		Available for PF and VF in host. VF in guest currently only
+		has one debug register.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/current_q
+Date:           Nov 2018
+Contact:        linux-crypto@vger.kernel.org
+Description:    One QM may contain multiple queues. Select specific queue to
+		show its debug registers in above qm_regs.
+		Only available for PF.
+
+What:           /sys/kernel/debug/hisi_zip/<bdf>/qm/clear_enable
+Date:           Nov 2018
+Contact:        linux-crypto@vger.kernel.org
+Description:    QM debug registers(qm_regs) read clear control. 1 means enable
+		register read clear, otherwise 0.
+		Writing to this file has no functional effect, only enable or
+		disable counters clear after reading of these registers.
+		Only available for PF.
diff --git a/Documentation/crypto/crypto_engine.rst b/Documentation/crypto/crypto_engine.rst
index 236c674..3baa23c 100644
--- a/Documentation/crypto/crypto_engine.rst
+++ b/Documentation/crypto/crypto_engine.rst
@@ -1,4 +1,5 @@
 .. SPDX-License-Identifier: GPL-2.0
+
 Crypto Engine
 =============
 
diff --git a/Documentation/devicetree/bindings/rng/timeriomem_rng.txt b/Documentation/devicetree/bindings/rng/timeriomem_rng.txt
index 2149400..fb48461 100644
--- a/Documentation/devicetree/bindings/rng/timeriomem_rng.txt
+++ b/Documentation/devicetree/bindings/rng/timeriomem_rng.txt
@@ -12,7 +12,7 @@
             which disables using this rng to automatically fill the kernel's
             entropy pool.
 
-N.B. currently 'reg' must be four bytes wide and aligned
+N.B. currently 'reg' must be at least four bytes wide and 32-bit aligned
 
 Example:
 
diff --git a/Documentation/padata.txt b/Documentation/padata.txt
index b103d0c..b37ba1e 100644
--- a/Documentation/padata.txt
+++ b/Documentation/padata.txt
@@ -16,10 +16,12 @@
 
     #include <linux/padata.h>
 
-    struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+    struct padata_instance *padata_alloc(const char *name,
 					 const struct cpumask *pcpumask,
 					 const struct cpumask *cbcpumask);
 
+'name' simply identifies the instance.
+
 The pcpumask describes which processors will be used to execute work
 submitted to this instance in parallel. The cbcpumask defines which
 processors are allowed to be used as the serialization callback processor.
@@ -128,8 +130,7 @@
 
 Each task submitted to padata_do_parallel() will, in turn, be passed to
 exactly one call to the above-mentioned parallel() function, on one CPU, so
-true parallelism is achieved by submitting multiple tasks.  Despite the
-fact that the workqueue is used to make these calls, parallel() is run with
+true parallelism is achieved by submitting multiple tasks.  parallel() runs with
 software interrupts disabled and thus cannot sleep.  The parallel()
 function gets the padata_priv structure pointer as its lone parameter;
 information about the actual work to be done is probably obtained by using
@@ -148,7 +149,7 @@
 At some point in the future, padata_do_serial() will trigger a call to the
 serial() function in the padata_priv structure.  That call will happen on
 the CPU requested in the initial call to padata_do_parallel(); it, too, is
-done through the workqueue, but with local software interrupts disabled.
+run with local software interrupts disabled.
 Note that this call may be deferred for a while since the padata code takes
 pains to ensure that tasks are completed in the order in which they were
 submitted.
@@ -159,5 +160,4 @@
     void padata_free(struct padata_instance *pinst);
 
 This function will busy-wait while any remaining tasks are completed, so it
-might be best not to call it while there is work outstanding.  Shutting
-down the workqueue, if necessary, should be done separately.
+might be best not to call it while there is work outstanding.
diff --git a/MAINTAINERS b/MAINTAINERS
index 6e1b0ac..05033aa 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7350,6 +7350,17 @@
 F:	drivers/scsi/hisi_sas/
 F:	Documentation/devicetree/bindings/scsi/hisilicon-sas.txt
 
+HISILICON QM AND ZIP Controller DRIVER
+M:	Zhou Wang <wangzhou1@hisilicon.com>
+L:	linux-crypto@vger.kernel.org
+S:	Maintained
+F:	drivers/crypto/hisilicon/qm.c
+F:	drivers/crypto/hisilicon/qm.h
+F:	drivers/crypto/hisilicon/sgl.c
+F:	drivers/crypto/hisilicon/sgl.h
+F:	drivers/crypto/hisilicon/zip/
+F:	Documentation/ABI/testing/debugfs-hisi-zip
+
 HMM - Heterogeneous Memory Management
 M:	Jérôme Glisse <jglisse@redhat.com>
 L:	linux-mm@kvack.org
@@ -7703,7 +7714,7 @@
 F:	drivers/crypto/nx/nx-sha*
 F:	drivers/crypto/nx/nx.*
 F:	drivers/crypto/nx/nx_csbcpb.h
-F:	drivers/crypto/nx/nx_debugfs.h
+F:	drivers/crypto/nx/nx_debugfs.c
 
 IBM Power Linux RAID adapter
 M:	Brian King <brking@us.ibm.com>
diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
index a95322b..b24df84 100644
--- a/arch/arm/crypto/Kconfig
+++ b/arch/arm/crypto/Kconfig
@@ -82,8 +82,8 @@
 	tristate "Bit sliced AES using NEON instructions"
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_LIB_AES
 	select CRYPTO_SIMD
-	select CRYPTO_AES
 	help
 	  Use a faster and more secure NEON based implementation of AES in CBC,
 	  CTR and XTS modes
diff --git a/arch/arm/crypto/aes-ce-core.S b/arch/arm/crypto/aes-ce-core.S
index caac519..b978cdf 100644
--- a/arch/arm/crypto/aes-ce-core.S
+++ b/arch/arm/crypto/aes-ce-core.S
@@ -44,63 +44,73 @@
 	veor		q0, q0, \key3
 	.endm
 
-	.macro		enc_dround_3x, key1, key2
+	.macro		enc_dround_4x, key1, key2
 	enc_round	q0, \key1
 	enc_round	q1, \key1
 	enc_round	q2, \key1
+	enc_round	q3, \key1
 	enc_round	q0, \key2
 	enc_round	q1, \key2
 	enc_round	q2, \key2
+	enc_round	q3, \key2
 	.endm
 
-	.macro		dec_dround_3x, key1, key2
+	.macro		dec_dround_4x, key1, key2
 	dec_round	q0, \key1
 	dec_round	q1, \key1
 	dec_round	q2, \key1
+	dec_round	q3, \key1
 	dec_round	q0, \key2
 	dec_round	q1, \key2
 	dec_round	q2, \key2
+	dec_round	q3, \key2
 	.endm
 
-	.macro		enc_fround_3x, key1, key2, key3
+	.macro		enc_fround_4x, key1, key2, key3
 	enc_round	q0, \key1
 	enc_round	q1, \key1
 	enc_round	q2, \key1
+	enc_round	q3, \key1
 	aese.8		q0, \key2
 	aese.8		q1, \key2
 	aese.8		q2, \key2
+	aese.8		q3, \key2
 	veor		q0, q0, \key3
 	veor		q1, q1, \key3
 	veor		q2, q2, \key3
+	veor		q3, q3, \key3
 	.endm
 
-	.macro		dec_fround_3x, key1, key2, key3
+	.macro		dec_fround_4x, key1, key2, key3
 	dec_round	q0, \key1
 	dec_round	q1, \key1
 	dec_round	q2, \key1
+	dec_round	q3, \key1
 	aesd.8		q0, \key2
 	aesd.8		q1, \key2
 	aesd.8		q2, \key2
+	aesd.8		q3, \key2
 	veor		q0, q0, \key3
 	veor		q1, q1, \key3
 	veor		q2, q2, \key3
+	veor		q3, q3, \key3
 	.endm
 
 	.macro		do_block, dround, fround
 	cmp		r3, #12			@ which key size?
-	vld1.8		{q10-q11}, [ip]!
+	vld1.32		{q10-q11}, [ip]!
 	\dround		q8, q9
-	vld1.8		{q12-q13}, [ip]!
+	vld1.32		{q12-q13}, [ip]!
 	\dround		q10, q11
-	vld1.8		{q10-q11}, [ip]!
+	vld1.32		{q10-q11}, [ip]!
 	\dround		q12, q13
-	vld1.8		{q12-q13}, [ip]!
+	vld1.32		{q12-q13}, [ip]!
 	\dround		q10, q11
 	blo		0f			@ AES-128: 10 rounds
-	vld1.8		{q10-q11}, [ip]!
+	vld1.32		{q10-q11}, [ip]!
 	\dround		q12, q13
 	beq		1f			@ AES-192: 12 rounds
-	vld1.8		{q12-q13}, [ip]
+	vld1.32		{q12-q13}, [ip]
 	\dround		q10, q11
 0:	\fround		q12, q13, q14
 	bx		lr
@@ -114,8 +124,9 @@
 	 * transforms. These should preserve all registers except q0 - q2 and ip
 	 * Arguments:
 	 *   q0        : first in/output block
-	 *   q1        : second in/output block (_3x version only)
-	 *   q2        : third in/output block (_3x version only)
+	 *   q1        : second in/output block (_4x version only)
+	 *   q2        : third in/output block (_4x version only)
+	 *   q3        : fourth in/output block (_4x version only)
 	 *   q8        : first round key
 	 *   q9        : secound round key
 	 *   q14       : final round key
@@ -136,44 +147,44 @@
 ENDPROC(aes_decrypt)
 
 	.align		6
-aes_encrypt_3x:
+aes_encrypt_4x:
 	add		ip, r2, #32		@ 3rd round key
-	do_block	enc_dround_3x, enc_fround_3x
-ENDPROC(aes_encrypt_3x)
+	do_block	enc_dround_4x, enc_fround_4x
+ENDPROC(aes_encrypt_4x)
 
 	.align		6
-aes_decrypt_3x:
+aes_decrypt_4x:
 	add		ip, r2, #32		@ 3rd round key
-	do_block	dec_dround_3x, dec_fround_3x
-ENDPROC(aes_decrypt_3x)
+	do_block	dec_dround_4x, dec_fround_4x
+ENDPROC(aes_decrypt_4x)
 
 	.macro		prepare_key, rk, rounds
 	add		ip, \rk, \rounds, lsl #4
-	vld1.8		{q8-q9}, [\rk]		@ load first 2 round keys
-	vld1.8		{q14}, [ip]		@ load last round key
+	vld1.32		{q8-q9}, [\rk]		@ load first 2 round keys
+	vld1.32		{q14}, [ip]		@ load last round key
 	.endm
 
 	/*
-	 * aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 * aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
 	 *		   int blocks)
-	 * aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 * aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
 	 *		   int blocks)
 	 */
 ENTRY(ce_aes_ecb_encrypt)
 	push		{r4, lr}
 	ldr		r4, [sp, #8]
 	prepare_key	r2, r3
-.Lecbencloop3x:
-	subs		r4, r4, #3
+.Lecbencloop4x:
+	subs		r4, r4, #4
 	bmi		.Lecbenc1x
 	vld1.8		{q0-q1}, [r1]!
-	vld1.8		{q2}, [r1]!
-	bl		aes_encrypt_3x
+	vld1.8		{q2-q3}, [r1]!
+	bl		aes_encrypt_4x
 	vst1.8		{q0-q1}, [r0]!
-	vst1.8		{q2}, [r0]!
-	b		.Lecbencloop3x
+	vst1.8		{q2-q3}, [r0]!
+	b		.Lecbencloop4x
 .Lecbenc1x:
-	adds		r4, r4, #3
+	adds		r4, r4, #4
 	beq		.Lecbencout
 .Lecbencloop:
 	vld1.8		{q0}, [r1]!
@@ -189,17 +200,17 @@
 	push		{r4, lr}
 	ldr		r4, [sp, #8]
 	prepare_key	r2, r3
-.Lecbdecloop3x:
-	subs		r4, r4, #3
+.Lecbdecloop4x:
+	subs		r4, r4, #4
 	bmi		.Lecbdec1x
 	vld1.8		{q0-q1}, [r1]!
-	vld1.8		{q2}, [r1]!
-	bl		aes_decrypt_3x
+	vld1.8		{q2-q3}, [r1]!
+	bl		aes_decrypt_4x
 	vst1.8		{q0-q1}, [r0]!
-	vst1.8		{q2}, [r0]!
-	b		.Lecbdecloop3x
+	vst1.8		{q2-q3}, [r0]!
+	b		.Lecbdecloop4x
 .Lecbdec1x:
-	adds		r4, r4, #3
+	adds		r4, r4, #4
 	beq		.Lecbdecout
 .Lecbdecloop:
 	vld1.8		{q0}, [r1]!
@@ -212,9 +223,9 @@
 ENDPROC(ce_aes_ecb_decrypt)
 
 	/*
-	 * aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 * aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
 	 *		   int blocks, u8 iv[])
-	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 * aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
 	 *		   int blocks, u8 iv[])
 	 */
 ENTRY(ce_aes_cbc_encrypt)
@@ -236,88 +247,181 @@
 ENTRY(ce_aes_cbc_decrypt)
 	push		{r4-r6, lr}
 	ldrd		r4, r5, [sp, #16]
-	vld1.8		{q6}, [r5]		@ keep iv in q6
+	vld1.8		{q15}, [r5]		@ keep iv in q15
 	prepare_key	r2, r3
-.Lcbcdecloop3x:
-	subs		r4, r4, #3
+.Lcbcdecloop4x:
+	subs		r4, r4, #4
 	bmi		.Lcbcdec1x
 	vld1.8		{q0-q1}, [r1]!
-	vld1.8		{q2}, [r1]!
-	vmov		q3, q0
-	vmov		q4, q1
-	vmov		q5, q2
-	bl		aes_decrypt_3x
-	veor		q0, q0, q6
-	veor		q1, q1, q3
-	veor		q2, q2, q4
-	vmov		q6, q5
+	vld1.8		{q2-q3}, [r1]!
+	vmov		q4, q0
+	vmov		q5, q1
+	vmov		q6, q2
+	vmov		q7, q3
+	bl		aes_decrypt_4x
+	veor		q0, q0, q15
+	veor		q1, q1, q4
+	veor		q2, q2, q5
+	veor		q3, q3, q6
+	vmov		q15, q7
 	vst1.8		{q0-q1}, [r0]!
-	vst1.8		{q2}, [r0]!
-	b		.Lcbcdecloop3x
+	vst1.8		{q2-q3}, [r0]!
+	b		.Lcbcdecloop4x
 .Lcbcdec1x:
-	adds		r4, r4, #3
+	adds		r4, r4, #4
 	beq		.Lcbcdecout
-	vmov		q15, q14		@ preserve last round key
+	vmov		q6, q14			@ preserve last round key
 .Lcbcdecloop:
 	vld1.8		{q0}, [r1]!		@ get next ct block
 	veor		q14, q15, q6		@ combine prev ct with last key
-	vmov		q6, q0
+	vmov		q15, q0
 	bl		aes_decrypt
 	vst1.8		{q0}, [r0]!
 	subs		r4, r4, #1
 	bne		.Lcbcdecloop
 .Lcbcdecout:
-	vst1.8		{q6}, [r5]		@ keep iv in q6
+	vst1.8		{q15}, [r5]		@ keep iv in q15
 	pop		{r4-r6, pc}
 ENDPROC(ce_aes_cbc_decrypt)
 
+
 	/*
-	 * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
+	 * ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
+	 *			  int rounds, int bytes, u8 const iv[])
+	 * ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
+	 *			  int rounds, int bytes, u8 const iv[])
+	 */
+
+ENTRY(ce_aes_cbc_cts_encrypt)
+	push		{r4-r6, lr}
+	ldrd		r4, r5, [sp, #16]
+
+	movw		ip, :lower16:.Lcts_permute_table
+	movt		ip, :upper16:.Lcts_permute_table
+	sub		r4, r4, #16
+	add		lr, ip, #32
+	add		ip, ip, r4
+	sub		lr, lr, r4
+	vld1.8		{q5}, [ip]
+	vld1.8		{q6}, [lr]
+
+	add		ip, r1, r4
+	vld1.8		{q0}, [r1]			@ overlapping loads
+	vld1.8		{q3}, [ip]
+
+	vld1.8		{q1}, [r5]			@ get iv
+	prepare_key	r2, r3
+
+	veor		q0, q0, q1			@ xor with iv
+	bl		aes_encrypt
+
+	vtbl.8		d4, {d0-d1}, d10
+	vtbl.8		d5, {d0-d1}, d11
+	vtbl.8		d2, {d6-d7}, d12
+	vtbl.8		d3, {d6-d7}, d13
+
+	veor		q0, q0, q1
+	bl		aes_encrypt
+
+	add		r4, r0, r4
+	vst1.8		{q2}, [r4]			@ overlapping stores
+	vst1.8		{q0}, [r0]
+
+	pop		{r4-r6, pc}
+ENDPROC(ce_aes_cbc_cts_encrypt)
+
+ENTRY(ce_aes_cbc_cts_decrypt)
+	push		{r4-r6, lr}
+	ldrd		r4, r5, [sp, #16]
+
+	movw		ip, :lower16:.Lcts_permute_table
+	movt		ip, :upper16:.Lcts_permute_table
+	sub		r4, r4, #16
+	add		lr, ip, #32
+	add		ip, ip, r4
+	sub		lr, lr, r4
+	vld1.8		{q5}, [ip]
+	vld1.8		{q6}, [lr]
+
+	add		ip, r1, r4
+	vld1.8		{q0}, [r1]			@ overlapping loads
+	vld1.8		{q1}, [ip]
+
+	vld1.8		{q3}, [r5]			@ get iv
+	prepare_key	r2, r3
+
+	bl		aes_decrypt
+
+	vtbl.8		d4, {d0-d1}, d10
+	vtbl.8		d5, {d0-d1}, d11
+	vtbx.8		d0, {d2-d3}, d12
+	vtbx.8		d1, {d2-d3}, d13
+
+	veor		q1, q1, q2
+	bl		aes_decrypt
+	veor		q0, q0, q3			@ xor with iv
+
+	add		r4, r0, r4
+	vst1.8		{q1}, [r4]			@ overlapping stores
+	vst1.8		{q0}, [r0]
+
+	pop		{r4-r6, pc}
+ENDPROC(ce_aes_cbc_cts_decrypt)
+
+
+	/*
+	 * aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds,
 	 *		   int blocks, u8 ctr[])
 	 */
 ENTRY(ce_aes_ctr_encrypt)
 	push		{r4-r6, lr}
 	ldrd		r4, r5, [sp, #16]
-	vld1.8		{q6}, [r5]		@ load ctr
+	vld1.8		{q7}, [r5]		@ load ctr
 	prepare_key	r2, r3
-	vmov		r6, s27			@ keep swabbed ctr in r6
+	vmov		r6, s31			@ keep swabbed ctr in r6
 	rev		r6, r6
 	cmn		r6, r4			@ 32 bit overflow?
 	bcs		.Lctrloop
-.Lctrloop3x:
-	subs		r4, r4, #3
+.Lctrloop4x:
+	subs		r4, r4, #4
 	bmi		.Lctr1x
 	add		r6, r6, #1
-	vmov		q0, q6
-	vmov		q1, q6
+	vmov		q0, q7
+	vmov		q1, q7
 	rev		ip, r6
 	add		r6, r6, #1
-	vmov		q2, q6
+	vmov		q2, q7
 	vmov		s7, ip
 	rev		ip, r6
 	add		r6, r6, #1
+	vmov		q3, q7
 	vmov		s11, ip
-	vld1.8		{q3-q4}, [r1]!
-	vld1.8		{q5}, [r1]!
-	bl		aes_encrypt_3x
-	veor		q0, q0, q3
-	veor		q1, q1, q4
-	veor		q2, q2, q5
+	rev		ip, r6
+	add		r6, r6, #1
+	vmov		s15, ip
+	vld1.8		{q4-q5}, [r1]!
+	vld1.8		{q6}, [r1]!
+	vld1.8		{q15}, [r1]!
+	bl		aes_encrypt_4x
+	veor		q0, q0, q4
+	veor		q1, q1, q5
+	veor		q2, q2, q6
+	veor		q3, q3, q15
 	rev		ip, r6
 	vst1.8		{q0-q1}, [r0]!
-	vst1.8		{q2}, [r0]!
-	vmov		s27, ip
-	b		.Lctrloop3x
+	vst1.8		{q2-q3}, [r0]!
+	vmov		s31, ip
+	b		.Lctrloop4x
 .Lctr1x:
-	adds		r4, r4, #3
+	adds		r4, r4, #4
 	beq		.Lctrout
 .Lctrloop:
-	vmov		q0, q6
+	vmov		q0, q7
 	bl		aes_encrypt
 
 	adds		r6, r6, #1		@ increment BE ctr
 	rev		ip, r6
-	vmov		s27, ip
+	vmov		s31, ip
 	bcs		.Lctrcarry
 
 .Lctrcarrydone:
@@ -329,7 +433,7 @@
 	bne		.Lctrloop
 
 .Lctrout:
-	vst1.8		{q6}, [r5]		@ return next CTR value
+	vst1.8		{q7}, [r5]		@ return next CTR value
 	pop		{r4-r6, pc}
 
 .Lctrtailblock:
@@ -337,7 +441,7 @@
 	b		.Lctrout
 
 .Lctrcarry:
-	.irp		sreg, s26, s25, s24
+	.irp		sreg, s30, s29, s28
 	vmov		ip, \sreg		@ load next word of ctr
 	rev		ip, ip			@ ... to handle the carry
 	adds		ip, ip, #1
@@ -349,10 +453,10 @@
 ENDPROC(ce_aes_ctr_encrypt)
 
 	/*
-	 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
-	 *		   int blocks, u8 iv[], u8 const rk2[], int first)
-	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
-	 *		   int blocks, u8 iv[], u8 const rk2[], int first)
+	 * aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
+	 *		   int bytes, u8 iv[], u32 const rk2[], int first)
+	 * aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds,
+	 *		   int bytes, u8 iv[], u32 const rk2[], int first)
 	 */
 
 	.macro		next_tweak, out, in, const, tmp
@@ -363,13 +467,10 @@
 	veor		\out, \out, \tmp
 	.endm
 
-	.align		3
-.Lxts_mul_x:
-	.quad		1, 0x87
-
 ce_aes_xts_init:
-	vldr		d14, .Lxts_mul_x
-	vldr		d15, .Lxts_mul_x + 8
+	vmov.i32	d30, #0x87		@ compose tweak mask vector
+	vmovl.u32	q15, d30
+	vshr.u64	d30, d31, #7
 
 	ldrd		r4, r5, [sp, #16]	@ load args
 	ldr		r6, [sp, #28]
@@ -390,49 +491,86 @@
 
 	bl		ce_aes_xts_init		@ run shared prologue
 	prepare_key	r2, r3
-	vmov		q3, q0
+	vmov		q4, q0
 
 	teq		r6, #0			@ start of a block?
-	bne		.Lxtsenc3x
+	bne		.Lxtsenc4x
 
-.Lxtsencloop3x:
-	next_tweak	q3, q3, q7, q6
-.Lxtsenc3x:
-	subs		r4, r4, #3
+.Lxtsencloop4x:
+	next_tweak	q4, q4, q15, q10
+.Lxtsenc4x:
+	subs		r4, r4, #64
 	bmi		.Lxtsenc1x
-	vld1.8		{q0-q1}, [r1]!		@ get 3 pt blocks
-	vld1.8		{q2}, [r1]!
-	next_tweak	q4, q3, q7, q6
-	veor		q0, q0, q3
-	next_tweak	q5, q4, q7, q6
-	veor		q1, q1, q4
-	veor		q2, q2, q5
-	bl		aes_encrypt_3x
-	veor		q0, q0, q3
-	veor		q1, q1, q4
-	veor		q2, q2, q5
-	vst1.8		{q0-q1}, [r0]!		@ write 3 ct blocks
-	vst1.8		{q2}, [r0]!
-	vmov		q3, q5
+	vld1.8		{q0-q1}, [r1]!		@ get 4 pt blocks
+	vld1.8		{q2-q3}, [r1]!
+	next_tweak	q5, q4, q15, q10
+	veor		q0, q0, q4
+	next_tweak	q6, q5, q15, q10
+	veor		q1, q1, q5
+	next_tweak	q7, q6, q15, q10
+	veor		q2, q2, q6
+	veor		q3, q3, q7
+	bl		aes_encrypt_4x
+	veor		q0, q0, q4
+	veor		q1, q1, q5
+	veor		q2, q2, q6
+	veor		q3, q3, q7
+	vst1.8		{q0-q1}, [r0]!		@ write 4 ct blocks
+	vst1.8		{q2-q3}, [r0]!
+	vmov		q4, q7
 	teq		r4, #0
-	beq		.Lxtsencout
-	b		.Lxtsencloop3x
+	beq		.Lxtsencret
+	b		.Lxtsencloop4x
 .Lxtsenc1x:
-	adds		r4, r4, #3
+	adds		r4, r4, #64
 	beq		.Lxtsencout
+	subs		r4, r4, #16
+	bmi		.LxtsencctsNx
 .Lxtsencloop:
 	vld1.8		{q0}, [r1]!
-	veor		q0, q0, q3
+.Lxtsencctsout:
+	veor		q0, q0, q4
 	bl		aes_encrypt
-	veor		q0, q0, q3
-	vst1.8		{q0}, [r0]!
-	subs		r4, r4, #1
+	veor		q0, q0, q4
+	teq		r4, #0
 	beq		.Lxtsencout
-	next_tweak	q3, q3, q7, q6
+	subs		r4, r4, #16
+	next_tweak	q4, q4, q15, q6
+	bmi		.Lxtsenccts
+	vst1.8		{q0}, [r0]!
 	b		.Lxtsencloop
 .Lxtsencout:
-	vst1.8		{q3}, [r5]
+	vst1.8		{q0}, [r0]
+.Lxtsencret:
+	vst1.8		{q4}, [r5]
 	pop		{r4-r6, pc}
+
+.LxtsencctsNx:
+	vmov		q0, q3
+	sub		r0, r0, #16
+.Lxtsenccts:
+	movw		ip, :lower16:.Lcts_permute_table
+	movt		ip, :upper16:.Lcts_permute_table
+
+	add		r1, r1, r4		@ rewind input pointer
+	add		r4, r4, #16		@ # bytes in final block
+	add		lr, ip, #32
+	add		ip, ip, r4
+	sub		lr, lr, r4
+	add		r4, r0, r4		@ output address of final block
+
+	vld1.8		{q1}, [r1]		@ load final partial block
+	vld1.8		{q2}, [ip]
+	vld1.8		{q3}, [lr]
+
+	vtbl.8		d4, {d0-d1}, d4
+	vtbl.8		d5, {d0-d1}, d5
+	vtbx.8		d0, {d2-d3}, d6
+	vtbx.8		d1, {d2-d3}, d7
+
+	vst1.8		{q2}, [r4]		@ overlapping stores
+	mov		r4, #0
+	b		.Lxtsencctsout
 ENDPROC(ce_aes_xts_encrypt)
 
 
@@ -441,50 +579,90 @@
 
 	bl		ce_aes_xts_init		@ run shared prologue
 	prepare_key	r2, r3
-	vmov		q3, q0
+	vmov		q4, q0
+
+	/* subtract 16 bytes if we are doing CTS */
+	tst		r4, #0xf
+	subne		r4, r4, #0x10
 
 	teq		r6, #0			@ start of a block?
-	bne		.Lxtsdec3x
+	bne		.Lxtsdec4x
 
-.Lxtsdecloop3x:
-	next_tweak	q3, q3, q7, q6
-.Lxtsdec3x:
-	subs		r4, r4, #3
+.Lxtsdecloop4x:
+	next_tweak	q4, q4, q15, q10
+.Lxtsdec4x:
+	subs		r4, r4, #64
 	bmi		.Lxtsdec1x
-	vld1.8		{q0-q1}, [r1]!		@ get 3 ct blocks
-	vld1.8		{q2}, [r1]!
-	next_tweak	q4, q3, q7, q6
-	veor		q0, q0, q3
-	next_tweak	q5, q4, q7, q6
-	veor		q1, q1, q4
-	veor		q2, q2, q5
-	bl		aes_decrypt_3x
-	veor		q0, q0, q3
-	veor		q1, q1, q4
-	veor		q2, q2, q5
-	vst1.8		{q0-q1}, [r0]!		@ write 3 pt blocks
-	vst1.8		{q2}, [r0]!
-	vmov		q3, q5
+	vld1.8		{q0-q1}, [r1]!		@ get 4 ct blocks
+	vld1.8		{q2-q3}, [r1]!
+	next_tweak	q5, q4, q15, q10
+	veor		q0, q0, q4
+	next_tweak	q6, q5, q15, q10
+	veor		q1, q1, q5
+	next_tweak	q7, q6, q15, q10
+	veor		q2, q2, q6
+	veor		q3, q3, q7
+	bl		aes_decrypt_4x
+	veor		q0, q0, q4
+	veor		q1, q1, q5
+	veor		q2, q2, q6
+	veor		q3, q3, q7
+	vst1.8		{q0-q1}, [r0]!		@ write 4 pt blocks
+	vst1.8		{q2-q3}, [r0]!
+	vmov		q4, q7
 	teq		r4, #0
 	beq		.Lxtsdecout
-	b		.Lxtsdecloop3x
+	b		.Lxtsdecloop4x
 .Lxtsdec1x:
-	adds		r4, r4, #3
+	adds		r4, r4, #64
 	beq		.Lxtsdecout
+	subs		r4, r4, #16
 .Lxtsdecloop:
 	vld1.8		{q0}, [r1]!
-	veor		q0, q0, q3
-	add		ip, r2, #32		@ 3rd round key
+	bmi		.Lxtsdeccts
+.Lxtsdecctsout:
+	veor		q0, q0, q4
 	bl		aes_decrypt
-	veor		q0, q0, q3
+	veor		q0, q0, q4
 	vst1.8		{q0}, [r0]!
-	subs		r4, r4, #1
+	teq		r4, #0
 	beq		.Lxtsdecout
-	next_tweak	q3, q3, q7, q6
+	subs		r4, r4, #16
+	next_tweak	q4, q4, q15, q6
 	b		.Lxtsdecloop
 .Lxtsdecout:
-	vst1.8		{q3}, [r5]
+	vst1.8		{q4}, [r5]
 	pop		{r4-r6, pc}
+
+.Lxtsdeccts:
+	movw		ip, :lower16:.Lcts_permute_table
+	movt		ip, :upper16:.Lcts_permute_table
+
+	add		r1, r1, r4		@ rewind input pointer
+	add		r4, r4, #16		@ # bytes in final block
+	add		lr, ip, #32
+	add		ip, ip, r4
+	sub		lr, lr, r4
+	add		r4, r0, r4		@ output address of final block
+
+	next_tweak	q5, q4, q15, q6
+
+	vld1.8		{q1}, [r1]		@ load final partial block
+	vld1.8		{q2}, [ip]
+	vld1.8		{q3}, [lr]
+
+	veor		q0, q0, q5
+	bl		aes_decrypt
+	veor		q0, q0, q5
+
+	vtbl.8		d4, {d0-d1}, d4
+	vtbl.8		d5, {d0-d1}, d5
+	vtbx.8		d0, {d2-d3}, d6
+	vtbx.8		d1, {d2-d3}, d7
+
+	vst1.8		{q2}, [r4]		@ overlapping stores
+	mov		r4, #0
+	b		.Lxtsdecctsout
 ENDPROC(ce_aes_xts_decrypt)
 
 	/*
@@ -505,8 +683,18 @@
 	 *                                        operation on round key *src
 	 */
 ENTRY(ce_aes_invert)
-	vld1.8		{q0}, [r1]
+	vld1.32		{q0}, [r1]
 	aesimc.8	q0, q0
-	vst1.8		{q0}, [r0]
+	vst1.32		{q0}, [r0]
 	bx		lr
 ENDPROC(ce_aes_invert)
+
+	.section	".rodata", "a"
+	.align		6
+.Lcts_permute_table:
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
+	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe,  0xf
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
+	.byte		0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
diff --git a/arch/arm/crypto/aes-ce-glue.c b/arch/arm/crypto/aes-ce-glue.c
index 620aacf..cdb1a07 100644
--- a/arch/arm/crypto/aes-ce-glue.c
+++ b/arch/arm/crypto/aes-ce-glue.c
@@ -7,9 +7,13 @@
 
 #include <asm/hwcap.h>
 #include <asm/neon.h>
+#include <asm/simd.h>
+#include <asm/unaligned.h>
 #include <crypto/aes.h>
+#include <crypto/ctr.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <linux/cpufeature.h>
 #include <linux/module.h>
 #include <crypto/xts.h>
@@ -22,25 +26,29 @@
 asmlinkage u32 ce_aes_sub(u32 input);
 asmlinkage void ce_aes_invert(void *dst, void *src);
 
-asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void ce_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				   int rounds, int blocks);
-asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void ce_aes_ecb_decrypt(u8 out[], u8 const in[], u32 const rk[],
 				   int rounds, int blocks);
 
-asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void ce_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				   int rounds, int blocks, u8 iv[]);
-asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void ce_aes_cbc_decrypt(u8 out[], u8 const in[], u32 const rk[],
 				   int rounds, int blocks, u8 iv[]);
+asmlinkage void ce_aes_cbc_cts_encrypt(u8 out[], u8 const in[], u32 const rk[],
+				   int rounds, int bytes, u8 const iv[]);
+asmlinkage void ce_aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[],
+				   int rounds, int bytes, u8 const iv[]);
 
-asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
+asmlinkage void ce_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				   int rounds, int blocks, u8 ctr[]);
 
-asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[],
-				   int rounds, int blocks, u8 iv[],
-				   u8 const rk2[], int first);
-asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[],
-				   int rounds, int blocks, u8 iv[],
-				   u8 const rk2[], int first);
+asmlinkage void ce_aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
+				   int rounds, int bytes, u8 iv[],
+				   u32 const rk2[], int first);
+asmlinkage void ce_aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
+				   int rounds, int bytes, u8 iv[],
+				   u32 const rk2[], int first);
 
 struct aes_block {
 	u8 b[AES_BLOCK_SIZE];
@@ -77,21 +85,17 @@
 	    key_len != AES_KEYSIZE_256)
 		return -EINVAL;
 
-	memcpy(ctx->key_enc, in_key, key_len);
 	ctx->key_length = key_len;
+	for (i = 0; i < kwords; i++)
+		ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
 
 	kernel_neon_begin();
 	for (i = 0; i < sizeof(rcon); i++) {
 		u32 *rki = ctx->key_enc + (i * kwords);
 		u32 *rko = rki + kwords;
 
-#ifndef CONFIG_CPU_BIG_ENDIAN
 		rko[0] = ror32(ce_aes_sub(rki[kwords - 1]), 8);
 		rko[0] = rko[0] ^ rki[0] ^ rcon[i];
-#else
-		rko[0] = rol32(ce_aes_sub(rki[kwords - 1]), 8);
-		rko[0] = rko[0] ^ rki[0] ^ (rcon[i] << 24);
-#endif
 		rko[1] = rko[0] ^ rki[1];
 		rko[2] = rko[1] ^ rki[2];
 		rko[3] = rko[2] ^ rki[3];
@@ -178,15 +182,15 @@
 	unsigned int blocks;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		ce_aes_ecb_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks);
+				   ctx->key_enc, num_rounds(ctx), blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 	return err;
 }
 
@@ -198,58 +202,192 @@
 	unsigned int blocks;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		ce_aes_ecb_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   (u8 *)ctx->key_dec, num_rounds(ctx), blocks);
+				   ctx->key_dec, num_rounds(ctx), blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
+	return err;
+}
+
+static int cbc_encrypt_walk(struct skcipher_request *req,
+			    struct skcipher_walk *walk)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	unsigned int blocks;
+	int err = 0;
+
+	while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
+		ce_aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
+				   ctx->key_enc, num_rounds(ctx), blocks,
+				   walk->iv);
+		kernel_neon_end();
+		err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
+	}
 	return err;
 }
 
 static int cbc_encrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
-	unsigned int blocks;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
+	return cbc_encrypt_walk(req, &walk);
+}
 
-	kernel_neon_begin();
-	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
-		ce_aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
-				   walk.iv);
-		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+static int cbc_decrypt_walk(struct skcipher_request *req,
+			    struct skcipher_walk *walk)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	unsigned int blocks;
+	int err = 0;
+
+	while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
+		ce_aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
+				   ctx->key_dec, num_rounds(ctx), blocks,
+				   walk->iv);
+		kernel_neon_end();
+		err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 	return err;
 }
 
 static int cbc_decrypt(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct skcipher_walk walk;
-	unsigned int blocks;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
+	return cbc_decrypt_walk(req, &walk);
+}
+
+static int cts_cbc_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+	struct scatterlist *src = req->src, *dst = req->dst;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct skcipher_walk walk;
+	int err;
+
+	skcipher_request_set_tfm(&subreq, tfm);
+	skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+				      NULL, NULL);
+
+	if (req->cryptlen <= AES_BLOCK_SIZE) {
+		if (req->cryptlen < AES_BLOCK_SIZE)
+			return -EINVAL;
+		cbc_blocks = 1;
+	}
+
+	if (cbc_blocks > 0) {
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   cbc_blocks * AES_BLOCK_SIZE,
+					   req->iv);
+
+		err = skcipher_walk_virt(&walk, &subreq, false) ?:
+		      cbc_encrypt_walk(&subreq, &walk);
+		if (err)
+			return err;
+
+		if (req->cryptlen == AES_BLOCK_SIZE)
+			return 0;
+
+		dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
+		if (req->dst != req->src)
+			dst = scatterwalk_ffwd(sg_dst, req->dst,
+					       subreq.cryptlen);
+	}
+
+	/* handle ciphertext stealing */
+	skcipher_request_set_crypt(&subreq, src, dst,
+				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, &subreq, false);
+	if (err)
+		return err;
 
 	kernel_neon_begin();
-	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
-		ce_aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   (u8 *)ctx->key_dec, num_rounds(ctx), blocks,
-				   walk.iv);
-		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
-	}
+	ce_aes_cbc_cts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+			       ctx->key_enc, num_rounds(ctx), walk.nbytes,
+			       walk.iv);
 	kernel_neon_end();
-	return err;
+
+	return skcipher_walk_done(&walk, 0);
+}
+
+static int cts_cbc_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
+	struct scatterlist *src = req->src, *dst = req->dst;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct skcipher_walk walk;
+	int err;
+
+	skcipher_request_set_tfm(&subreq, tfm);
+	skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+				      NULL, NULL);
+
+	if (req->cryptlen <= AES_BLOCK_SIZE) {
+		if (req->cryptlen < AES_BLOCK_SIZE)
+			return -EINVAL;
+		cbc_blocks = 1;
+	}
+
+	if (cbc_blocks > 0) {
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   cbc_blocks * AES_BLOCK_SIZE,
+					   req->iv);
+
+		err = skcipher_walk_virt(&walk, &subreq, false) ?:
+		      cbc_decrypt_walk(&subreq, &walk);
+		if (err)
+			return err;
+
+		if (req->cryptlen == AES_BLOCK_SIZE)
+			return 0;
+
+		dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
+		if (req->dst != req->src)
+			dst = scatterwalk_ffwd(sg_dst, req->dst,
+					       subreq.cryptlen);
+	}
+
+	/* handle ciphertext stealing */
+	skcipher_request_set_crypt(&subreq, src, dst,
+				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, &subreq, false);
+	if (err)
+		return err;
+
+	kernel_neon_begin();
+	ce_aes_cbc_cts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+			       ctx->key_dec, num_rounds(ctx), walk.nbytes,
+			       walk.iv);
+	kernel_neon_end();
+
+	return skcipher_walk_done(&walk, 0);
 }
 
 static int ctr_encrypt(struct skcipher_request *req)
@@ -259,13 +397,14 @@
 	struct skcipher_walk walk;
 	int err, blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+		kernel_neon_begin();
 		ce_aes_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   (u8 *)ctx->key_enc, num_rounds(ctx), blocks,
+				   ctx->key_enc, num_rounds(ctx), blocks,
 				   walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
 	}
 	if (walk.nbytes) {
@@ -279,36 +418,109 @@
 		 */
 		blocks = -1;
 
-		ce_aes_ctr_encrypt(tail, NULL, (u8 *)ctx->key_enc,
-				   num_rounds(ctx), blocks, walk.iv);
+		kernel_neon_begin();
+		ce_aes_ctr_encrypt(tail, NULL, ctx->key_enc, num_rounds(ctx),
+				   blocks, walk.iv);
+		kernel_neon_end();
 		crypto_xor_cpy(tdst, tsrc, tail, nbytes);
 		err = skcipher_walk_done(&walk, 0);
 	}
-	kernel_neon_end();
-
 	return err;
 }
 
+static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
+{
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	unsigned long flags;
+
+	/*
+	 * Temporarily disable interrupts to avoid races where
+	 * cachelines are evicted when the CPU is interrupted
+	 * to do something else.
+	 */
+	local_irq_save(flags);
+	aes_encrypt(ctx, dst, src);
+	local_irq_restore(flags);
+}
+
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+	if (!crypto_simd_usable())
+		return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
+
+	return ctr_encrypt(req);
+}
+
 static int xts_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = num_rounds(&ctx->key1);
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct scatterlist *src, *dst;
 	struct skcipher_walk walk;
-	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+		int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+					      AES_BLOCK_SIZE) - 2;
+
+		skcipher_walk_abort(&walk);
+
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      skcipher_request_flags(req),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   xts_blocks * AES_BLOCK_SIZE,
+					   req->iv);
+		req = &subreq;
+		err = skcipher_walk_virt(&walk, req, false);
+	} else {
+		tail = 0;
+	}
+
+	for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
+		int nbytes = walk.nbytes;
+
+		if (walk.nbytes < walk.total)
+			nbytes &= ~(AES_BLOCK_SIZE - 1);
+
+		kernel_neon_begin();
+		ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   ctx->key1.key_enc, rounds, nbytes, walk.iv,
+				   ctx->key2.key_enc, first);
+		kernel_neon_end();
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	if (err || likely(!tail))
+		return err;
+
+	dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+	if (req->dst != req->src)
+		dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+	skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
 
 	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
-		ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   (u8 *)ctx->key1.key_enc, rounds, blocks,
-				   walk.iv, (u8 *)ctx->key2.key_enc, first);
-		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
-	}
+	ce_aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+			   ctx->key1.key_enc, rounds, walk.nbytes, walk.iv,
+			   ctx->key2.key_enc, first);
 	kernel_neon_end();
 
-	return err;
+	return skcipher_walk_done(&walk, 0);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -316,87 +528,165 @@
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = num_rounds(&ctx->key1);
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct scatterlist *src, *dst;
 	struct skcipher_walk walk;
-	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+		int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+					      AES_BLOCK_SIZE) - 2;
+
+		skcipher_walk_abort(&walk);
+
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      skcipher_request_flags(req),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   xts_blocks * AES_BLOCK_SIZE,
+					   req->iv);
+		req = &subreq;
+		err = skcipher_walk_virt(&walk, req, false);
+	} else {
+		tail = 0;
+	}
+
+	for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
+		int nbytes = walk.nbytes;
+
+		if (walk.nbytes < walk.total)
+			nbytes &= ~(AES_BLOCK_SIZE - 1);
+
+		kernel_neon_begin();
+		ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				   ctx->key1.key_dec, rounds, nbytes, walk.iv,
+				   ctx->key2.key_enc, first);
+		kernel_neon_end();
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	if (err || likely(!tail))
+		return err;
+
+	dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+	if (req->dst != req->src)
+		dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+	skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
 
 	kernel_neon_begin();
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
-		ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				   (u8 *)ctx->key1.key_dec, rounds, blocks,
-				   walk.iv, (u8 *)ctx->key2.key_enc, first);
-		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
-	}
+	ce_aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+			   ctx->key1.key_dec, rounds, walk.nbytes, walk.iv,
+			   ctx->key2.key_enc, first);
 	kernel_neon_end();
 
-	return err;
+	return skcipher_walk_done(&walk, 0);
 }
 
 static struct skcipher_alg aes_algs[] = { {
-	.base = {
-		.cra_name		= "__ecb(aes)",
-		.cra_driver_name	= "__ecb-aes-ce",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_INTERNAL,
-		.cra_blocksize		= AES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-		.cra_module		= THIS_MODULE,
-	},
-	.min_keysize	= AES_MIN_KEY_SIZE,
-	.max_keysize	= AES_MAX_KEY_SIZE,
-	.setkey		= ce_aes_setkey,
-	.encrypt	= ecb_encrypt,
-	.decrypt	= ecb_decrypt,
+	.base.cra_name		= "__ecb(aes)",
+	.base.cra_driver_name	= "__ecb-aes-ce",
+	.base.cra_priority	= 300,
+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct crypto_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.setkey			= ce_aes_setkey,
+	.encrypt		= ecb_encrypt,
+	.decrypt		= ecb_decrypt,
 }, {
-	.base = {
-		.cra_name		= "__cbc(aes)",
-		.cra_driver_name	= "__cbc-aes-ce",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_INTERNAL,
-		.cra_blocksize		= AES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-		.cra_module		= THIS_MODULE,
-	},
-	.min_keysize	= AES_MIN_KEY_SIZE,
-	.max_keysize	= AES_MAX_KEY_SIZE,
-	.ivsize		= AES_BLOCK_SIZE,
-	.setkey		= ce_aes_setkey,
-	.encrypt	= cbc_encrypt,
-	.decrypt	= cbc_decrypt,
+	.base.cra_name		= "__cbc(aes)",
+	.base.cra_driver_name	= "__cbc-aes-ce",
+	.base.cra_priority	= 300,
+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct crypto_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= ce_aes_setkey,
+	.encrypt		= cbc_encrypt,
+	.decrypt		= cbc_decrypt,
 }, {
-	.base = {
-		.cra_name		= "__ctr(aes)",
-		.cra_driver_name	= "__ctr-aes-ce",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_INTERNAL,
-		.cra_blocksize		= 1,
-		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-		.cra_module		= THIS_MODULE,
-	},
-	.min_keysize	= AES_MIN_KEY_SIZE,
-	.max_keysize	= AES_MAX_KEY_SIZE,
-	.ivsize		= AES_BLOCK_SIZE,
-	.chunksize	= AES_BLOCK_SIZE,
-	.setkey		= ce_aes_setkey,
-	.encrypt	= ctr_encrypt,
-	.decrypt	= ctr_encrypt,
+	.base.cra_name		= "__cts(cbc(aes))",
+	.base.cra_driver_name	= "__cts-cbc-aes-ce",
+	.base.cra_priority	= 300,
+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct crypto_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.walksize		= 2 * AES_BLOCK_SIZE,
+	.setkey			= ce_aes_setkey,
+	.encrypt		= cts_cbc_encrypt,
+	.decrypt		= cts_cbc_decrypt,
 }, {
-	.base = {
-		.cra_name		= "__xts(aes)",
-		.cra_driver_name	= "__xts-aes-ce",
-		.cra_priority		= 300,
-		.cra_flags		= CRYPTO_ALG_INTERNAL,
-		.cra_blocksize		= AES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct crypto_aes_xts_ctx),
-		.cra_module		= THIS_MODULE,
-	},
-	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
-	.max_keysize	= 2 * AES_MAX_KEY_SIZE,
-	.ivsize		= AES_BLOCK_SIZE,
-	.setkey		= xts_set_key,
-	.encrypt	= xts_encrypt,
-	.decrypt	= xts_decrypt,
+	.base.cra_name		= "__ctr(aes)",
+	.base.cra_driver_name	= "__ctr-aes-ce",
+	.base.cra_priority	= 300,
+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct crypto_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
+	.setkey			= ce_aes_setkey,
+	.encrypt		= ctr_encrypt,
+	.decrypt		= ctr_encrypt,
+}, {
+	.base.cra_name		= "ctr(aes)",
+	.base.cra_driver_name	= "ctr-aes-ce-sync",
+	.base.cra_priority	= 300 - 1,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct crypto_aes_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
+	.setkey			= ce_aes_setkey,
+	.encrypt		= ctr_encrypt_sync,
+	.decrypt		= ctr_encrypt_sync,
+}, {
+	.base.cra_name		= "__xts(aes)",
+	.base.cra_driver_name	= "__xts-aes-ce",
+	.base.cra_priority	= 300,
+	.base.cra_flags		= CRYPTO_ALG_INTERNAL,
+	.base.cra_blocksize	= AES_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct crypto_aes_xts_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= 2 * AES_MIN_KEY_SIZE,
+	.max_keysize		= 2 * AES_MAX_KEY_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.walksize		= 2 * AES_BLOCK_SIZE,
+	.setkey			= xts_set_key,
+	.encrypt		= xts_encrypt,
+	.decrypt		= xts_decrypt,
 } };
 
 static struct simd_skcipher_alg *aes_simd_algs[ARRAY_SIZE(aes_algs)];
@@ -425,6 +715,9 @@
 		return err;
 
 	for (i = 0; i < ARRAY_SIZE(aes_algs); i++) {
+		if (!(aes_algs[i].base.cra_flags & CRYPTO_ALG_INTERNAL))
+			continue;
+
 		algname = aes_algs[i].base.cra_name + 2;
 		drvname = aes_algs[i].base.cra_driver_name + 2;
 		basename = aes_algs[i].base.cra_driver_name;
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index 4460ed0..472e56d 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -219,43 +219,5 @@
 
 	.align		5
 ENTRY(__aes_arm_decrypt)
-	do_crypt	iround, crypto_it_tab, __aes_arm_inverse_sbox, 0
+	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
 ENDPROC(__aes_arm_decrypt)
-
-	.section	".rodata", "a"
-	.align		L1_CACHE_SHIFT
-	.type		__aes_arm_inverse_sbox, %object
-__aes_arm_inverse_sbox:
-	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
-	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
-	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
-	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
-	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
-	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
-	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
-	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
-	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
-	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
-	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
-	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
-	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
-	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
-	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
-	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
-	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
-	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
-	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
-	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
-	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
-	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
-	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
-	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
-	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
-	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
-	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
-	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
-	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
-	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
-	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
-	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
-	.size		__aes_arm_inverse_sbox, . - __aes_arm_inverse_sbox
diff --git a/arch/arm/crypto/aes-cipher-glue.c b/arch/arm/crypto/aes-cipher-glue.c
index 128d0a1..8cd00f5 100644
--- a/arch/arm/crypto/aes-cipher-glue.c
+++ b/arch/arm/crypto/aes-cipher-glue.c
@@ -11,12 +11,9 @@
 #include <linux/module.h>
 
 asmlinkage void __aes_arm_encrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
-EXPORT_SYMBOL(__aes_arm_encrypt);
-
 asmlinkage void __aes_arm_decrypt(u32 *rk, int rounds, const u8 *in, u8 *out);
-EXPORT_SYMBOL(__aes_arm_decrypt);
 
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void aes_arm_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	int rounds = 6 + ctx->key_length / 4;
@@ -24,7 +21,7 @@
 	__aes_arm_encrypt(ctx->key_enc, rounds, in, out);
 }
 
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void aes_arm_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	int rounds = 6 + ctx->key_length / 4;
@@ -44,8 +41,8 @@
 	.cra_cipher.cia_min_keysize	= AES_MIN_KEY_SIZE,
 	.cra_cipher.cia_max_keysize	= AES_MAX_KEY_SIZE,
 	.cra_cipher.cia_setkey		= crypto_aes_set_key,
-	.cra_cipher.cia_encrypt		= aes_encrypt,
-	.cra_cipher.cia_decrypt		= aes_decrypt,
+	.cra_cipher.cia_encrypt		= aes_arm_encrypt,
+	.cra_cipher.cia_decrypt		= aes_arm_decrypt,
 
 #ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
 	.cra_alignmask			= 3,
diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S
index d3eab76..cfaed4e 100644
--- a/arch/arm/crypto/aes-neonbs-core.S
+++ b/arch/arm/crypto/aes-neonbs-core.S
@@ -887,19 +887,17 @@
 	veor		\out, \out, \tmp
 	.endm
 
-	.align		4
-.Lxts_mul_x:
-	.quad		1, 0x87
-
 	/*
 	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks, u8 iv[])
+	 *		     int blocks, u8 iv[], int reorder_last_tweak)
 	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
-	 *		     int blocks, u8 iv[])
+	 *		     int blocks, u8 iv[], int reorder_last_tweak)
 	 */
 __xts_prepare8:
 	vld1.8		{q14}, [r7]		// load iv
-	__ldr		q15, .Lxts_mul_x	// load tweak mask
+	vmov.i32	d30, #0x87		// compose tweak mask vector
+	vmovl.u32	q15, d30
+	vshr.u64	d30, d31, #7
 	vmov		q12, q14
 
 	__adr		ip, 0f
@@ -946,17 +944,25 @@
 
 	vld1.8		{q7}, [r1]!
 	next_tweak	q14, q12, q15, q13
-	veor		q7, q7, q12
+THUMB(	itt		le		)
+	W(cmple)	r8, #0
+	ble		1f
+0:	veor		q7, q7, q12
 	vst1.8		{q12}, [r4, :128]
 
-0:	vst1.8		{q14}, [r7]		// store next iv
+	vst1.8		{q14}, [r7]		// store next iv
 	bx		lr
+
+1:	vswp		q12, q14
+	b		0b
 ENDPROC(__xts_prepare8)
 
 	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
 	push		{r4-r8, lr}
 	mov		r5, sp			// preserve sp
 	ldrd		r6, r7, [sp, #24]	// get blocks and iv args
+	ldr		r8, [sp, #32]		// reorder final tweak?
+	rsb		r8, r8, #1
 	sub		ip, sp, #128		// make room for 8x tweak
 	bic		ip, ip, #0xf		// align sp to 16 bytes
 	mov		sp, ip
diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c
index bd0bee9..e85839a 100644
--- a/arch/arm/crypto/aes-neonbs-glue.c
+++ b/arch/arm/crypto/aes-neonbs-glue.c
@@ -6,10 +6,13 @@
  */
 
 #include <asm/neon.h>
+#include <asm/simd.h>
 #include <crypto/aes.h>
 #include <crypto/cbc.h>
+#include <crypto/ctr.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
 #include <linux/module.h>
 
@@ -35,9 +38,9 @@
 				  int rounds, int blocks, u8 ctr[], u8 final[]);
 
 asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[],
-				  int rounds, int blocks, u8 iv[]);
+				  int rounds, int blocks, u8 iv[], int);
 asmlinkage void aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[],
-				  int rounds, int blocks, u8 iv[]);
+				  int rounds, int blocks, u8 iv[], int);
 
 struct aesbs_ctx {
 	int	rounds;
@@ -51,9 +54,15 @@
 
 struct aesbs_xts_ctx {
 	struct aesbs_ctx	key;
+	struct crypto_cipher	*cts_tfm;
 	struct crypto_cipher	*tweak_tfm;
 };
 
+struct aesbs_ctr_ctx {
+	struct aesbs_ctx	key;		/* must be first member */
+	struct crypto_aes_ctx	fallback;
+};
+
 static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 			unsigned int key_len)
 {
@@ -61,7 +70,7 @@
 	struct crypto_aes_ctx rk;
 	int err;
 
-	err = crypto_aes_expand_key(&rk, in_key, key_len);
+	err = aes_expandkey(&rk, in_key, key_len);
 	if (err)
 		return err;
 
@@ -83,9 +92,8 @@
 	struct skcipher_walk walk;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
@@ -93,12 +101,13 @@
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 
+		kernel_neon_begin();
 		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->rk,
 		   ctx->rounds, blocks);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -120,7 +129,7 @@
 	struct crypto_aes_ctx rk;
 	int err;
 
-	err = crypto_aes_expand_key(&rk, in_key, key_len);
+	err = aes_expandkey(&rk, in_key, key_len);
 	if (err)
 		return err;
 
@@ -152,9 +161,8 @@
 	struct skcipher_walk walk;
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
@@ -162,13 +170,14 @@
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 
+		kernel_neon_begin();
 		aesbs_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				  ctx->key.rk, ctx->key.rounds, blocks,
 				  walk.iv);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
@@ -189,6 +198,25 @@
 	crypto_free_cipher(ctx->enc_tfm);
 }
 
+static int aesbs_ctr_setkey_sync(struct crypto_skcipher *tfm, const u8 *in_key,
+				 unsigned int key_len)
+{
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err;
+
+	err = aes_expandkey(&ctx->fallback, in_key, key_len);
+	if (err)
+		return err;
+
+	ctx->key.rounds = 6 + key_len / 4;
+
+	kernel_neon_begin();
+	aesbs_convert_key(ctx->key.rk, ctx->fallback.key_enc, ctx->key.rounds);
+	kernel_neon_end();
+
+	return 0;
+}
+
 static int ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -197,9 +225,8 @@
 	u8 buf[AES_BLOCK_SIZE];
 	int err;
 
-	err = skcipher_walk_virt(&walk, req, true);
+	err = skcipher_walk_virt(&walk, req, false);
 
-	kernel_neon_begin();
 	while (walk.nbytes > 0) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 		u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL;
@@ -210,8 +237,10 @@
 			final = NULL;
 		}
 
+		kernel_neon_begin();
 		aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
 				  ctx->rk, ctx->rounds, blocks, walk.iv, final);
+		kernel_neon_end();
 
 		if (final) {
 			u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE;
@@ -226,11 +255,33 @@
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
 	return err;
 }
 
+static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
+{
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+	unsigned long flags;
+
+	/*
+	 * Temporarily disable interrupts to avoid races where
+	 * cachelines are evicted when the CPU is interrupted
+	 * to do something else.
+	 */
+	local_irq_save(flags);
+	aes_encrypt(&ctx->fallback, dst, src);
+	local_irq_restore(flags);
+}
+
+static int ctr_encrypt_sync(struct skcipher_request *req)
+{
+	if (!crypto_simd_usable())
+		return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
+
+	return ctr_encrypt(req);
+}
+
 static int aesbs_xts_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 			    unsigned int key_len)
 {
@@ -242,6 +293,9 @@
 		return err;
 
 	key_len /= 2;
+	err = crypto_cipher_setkey(ctx->cts_tfm, in_key, key_len);
+	if (err)
+		return err;
 	err = crypto_cipher_setkey(ctx->tweak_tfm, in_key + key_len, key_len);
 	if (err)
 		return err;
@@ -253,7 +307,13 @@
 {
 	struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
+	ctx->cts_tfm = crypto_alloc_cipher("aes", 0, 0);
+	if (IS_ERR(ctx->cts_tfm))
+		return PTR_ERR(ctx->cts_tfm);
+
 	ctx->tweak_tfm = crypto_alloc_cipher("aes", 0, 0);
+	if (IS_ERR(ctx->tweak_tfm))
+		crypto_free_cipher(ctx->cts_tfm);
 
 	return PTR_ERR_OR_ZERO(ctx->tweak_tfm);
 }
@@ -263,49 +323,89 @@
 	struct aesbs_xts_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	crypto_free_cipher(ctx->tweak_tfm);
+	crypto_free_cipher(ctx->cts_tfm);
 }
 
-static int __xts_crypt(struct skcipher_request *req,
+static int __xts_crypt(struct skcipher_request *req, bool encrypt,
 		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
-				  int rounds, int blocks, u8 iv[]))
+				  int rounds, int blocks, u8 iv[], int))
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	struct skcipher_request subreq;
+	u8 buf[2 * AES_BLOCK_SIZE];
 	struct skcipher_walk walk;
 	int err;
 
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (unlikely(tail)) {
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      skcipher_request_flags(req),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   req->cryptlen - tail, req->iv);
+		req = &subreq;
+	}
+
 	err = skcipher_walk_virt(&walk, req, true);
 	if (err)
 		return err;
 
 	crypto_cipher_encrypt_one(ctx->tweak_tfm, walk.iv, walk.iv);
 
-	kernel_neon_begin();
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
+		int reorder_last_tweak = !encrypt && tail > 0;
 
-		if (walk.nbytes < walk.total)
+		if (walk.nbytes < walk.total) {
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
+			reorder_last_tweak = 0;
+		}
 
+		kernel_neon_begin();
 		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
-		   ctx->key.rounds, blocks, walk.iv);
+		   ctx->key.rounds, blocks, walk.iv, reorder_last_tweak);
+		kernel_neon_end();
 		err = skcipher_walk_done(&walk,
 					 walk.nbytes - blocks * AES_BLOCK_SIZE);
 	}
-	kernel_neon_end();
 
-	return err;
+	if (err || likely(!tail))
+		return err;
+
+	/* handle ciphertext stealing */
+	scatterwalk_map_and_copy(buf, req->dst, req->cryptlen - AES_BLOCK_SIZE,
+				 AES_BLOCK_SIZE, 0);
+	memcpy(buf + AES_BLOCK_SIZE, buf, tail);
+	scatterwalk_map_and_copy(buf, req->src, req->cryptlen, tail, 0);
+
+	crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
+
+	if (encrypt)
+		crypto_cipher_encrypt_one(ctx->cts_tfm, buf, buf);
+	else
+		crypto_cipher_decrypt_one(ctx->cts_tfm, buf, buf);
+
+	crypto_xor(buf, req->iv, AES_BLOCK_SIZE);
+
+	scatterwalk_map_and_copy(buf, req->dst, req->cryptlen - AES_BLOCK_SIZE,
+				 AES_BLOCK_SIZE + tail, 1);
+	return 0;
 }
 
 static int xts_encrypt(struct skcipher_request *req)
 {
-	return __xts_crypt(req, aesbs_xts_encrypt);
+	return __xts_crypt(req, true, aesbs_xts_encrypt);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
 {
-	return __xts_crypt(req, aesbs_xts_decrypt);
+	return __xts_crypt(req, false, aesbs_xts_decrypt);
 }
 
 static struct skcipher_alg aes_algs[] = { {
@@ -359,6 +459,22 @@
 	.encrypt		= ctr_encrypt,
 	.decrypt		= ctr_encrypt,
 }, {
+	.base.cra_name		= "ctr(aes)",
+	.base.cra_driver_name	= "ctr-aes-neonbs-sync",
+	.base.cra_priority	= 250 - 1,
+	.base.cra_blocksize	= 1,
+	.base.cra_ctxsize	= sizeof(struct aesbs_ctr_ctx),
+	.base.cra_module	= THIS_MODULE,
+
+	.min_keysize		= AES_MIN_KEY_SIZE,
+	.max_keysize		= AES_MAX_KEY_SIZE,
+	.chunksize		= AES_BLOCK_SIZE,
+	.walksize		= 8 * AES_BLOCK_SIZE,
+	.ivsize			= AES_BLOCK_SIZE,
+	.setkey			= aesbs_ctr_setkey_sync,
+	.encrypt		= ctr_encrypt_sync,
+	.decrypt		= ctr_encrypt_sync,
+}, {
 	.base.cra_name		= "__xts(aes)",
 	.base.cra_driver_name	= "__xts-aes-neonbs",
 	.base.cra_priority	= 250,
diff --git a/arch/arm/crypto/ghash-ce-glue.c b/arch/arm/crypto/ghash-ce-glue.c
index 52d472a..c691077 100644
--- a/arch/arm/crypto/ghash-ce-glue.c
+++ b/arch/arm/crypto/ghash-ce-glue.c
@@ -9,6 +9,7 @@
 #include <asm/neon.h>
 #include <asm/simd.h>
 #include <asm/unaligned.h>
+#include <crypto/b128ops.h>
 #include <crypto/cryptd.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
@@ -17,7 +18,7 @@
 #include <linux/crypto.h>
 #include <linux/module.h>
 
-MODULE_DESCRIPTION("GHASH secure hash using ARMv8 Crypto Extensions");
+MODULE_DESCRIPTION("GHASH hash function using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS_CRYPTO("ghash");
@@ -30,6 +31,8 @@
 	u64	h2[2];
 	u64	h3[2];
 	u64	h4[2];
+
+	be128	k;
 };
 
 struct ghash_desc_ctx {
@@ -62,6 +65,36 @@
 	return 0;
 }
 
+static void ghash_do_update(int blocks, u64 dg[], const char *src,
+			    struct ghash_key *key, const char *head)
+{
+	if (likely(crypto_simd_usable())) {
+		kernel_neon_begin();
+		pmull_ghash_update(blocks, dg, src, key, head);
+		kernel_neon_end();
+	} else {
+		be128 dst = { cpu_to_be64(dg[1]), cpu_to_be64(dg[0]) };
+
+		do {
+			const u8 *in = src;
+
+			if (head) {
+				in = head;
+				blocks++;
+				head = NULL;
+			} else {
+				src += GHASH_BLOCK_SIZE;
+			}
+
+			crypto_xor((u8 *)&dst, in, GHASH_BLOCK_SIZE);
+			gf128mul_lle(&dst, &key->k);
+		} while (--blocks);
+
+		dg[0] = be64_to_cpu(dst.b);
+		dg[1] = be64_to_cpu(dst.a);
+	}
+}
+
 static int ghash_update(struct shash_desc *desc, const u8 *src,
 			unsigned int len)
 {
@@ -85,10 +118,8 @@
 		blocks = len / GHASH_BLOCK_SIZE;
 		len %= GHASH_BLOCK_SIZE;
 
-		kernel_neon_begin();
-		pmull_ghash_update(blocks, ctx->digest, src, key,
-				   partial ? ctx->buf : NULL);
-		kernel_neon_end();
+		ghash_do_update(blocks, ctx->digest, src, key,
+				partial ? ctx->buf : NULL);
 		src += blocks * GHASH_BLOCK_SIZE;
 		partial = 0;
 	}
@@ -106,9 +137,7 @@
 		struct ghash_key *key = crypto_shash_ctx(desc->tfm);
 
 		memset(ctx->buf + partial, 0, GHASH_BLOCK_SIZE - partial);
-		kernel_neon_begin();
-		pmull_ghash_update(1, ctx->digest, ctx->buf, key, NULL);
-		kernel_neon_end();
+		ghash_do_update(1, ctx->digest, ctx->buf, key, NULL);
 	}
 	put_unaligned_be64(ctx->digest[1], dst);
 	put_unaligned_be64(ctx->digest[0], dst + 8);
@@ -132,24 +161,25 @@
 			const u8 *inkey, unsigned int keylen)
 {
 	struct ghash_key *key = crypto_shash_ctx(tfm);
-	be128 h, k;
+	be128 h;
 
 	if (keylen != GHASH_BLOCK_SIZE) {
 		crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return -EINVAL;
 	}
 
-	memcpy(&k, inkey, GHASH_BLOCK_SIZE);
-	ghash_reflect(key->h, &k);
+	/* needed for the fallback */
+	memcpy(&key->k, inkey, GHASH_BLOCK_SIZE);
+	ghash_reflect(key->h, &key->k);
 
-	h = k;
-	gf128mul_lle(&h, &k);
+	h = key->k;
+	gf128mul_lle(&h, &key->k);
 	ghash_reflect(key->h2, &h);
 
-	gf128mul_lle(&h, &k);
+	gf128mul_lle(&h, &key->k);
 	ghash_reflect(key->h3, &h);
 
-	gf128mul_lle(&h, &k);
+	gf128mul_lle(&h, &key->k);
 	ghash_reflect(key->h4, &h);
 
 	return 0;
@@ -162,15 +192,13 @@
 	.final			= ghash_final,
 	.setkey			= ghash_setkey,
 	.descsize		= sizeof(struct ghash_desc_ctx),
-	.base			= {
-		.cra_name	= "__ghash",
-		.cra_driver_name = "__driver-ghash-ce",
-		.cra_priority	= 0,
-		.cra_flags	= CRYPTO_ALG_INTERNAL,
-		.cra_blocksize	= GHASH_BLOCK_SIZE,
-		.cra_ctxsize	= sizeof(struct ghash_key),
-		.cra_module	= THIS_MODULE,
-	},
+
+	.base.cra_name		= "ghash",
+	.base.cra_driver_name	= "ghash-ce-sync",
+	.base.cra_priority	= 300 - 1,
+	.base.cra_blocksize	= GHASH_BLOCK_SIZE,
+	.base.cra_ctxsize	= sizeof(struct ghash_key),
+	.base.cra_module	= THIS_MODULE,
 };
 
 static int ghash_async_init(struct ahash_request *req)
@@ -285,9 +313,7 @@
 	struct cryptd_ahash *cryptd_tfm;
 	struct ghash_async_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cryptd_tfm = cryptd_alloc_ahash("__driver-ghash-ce",
-					CRYPTO_ALG_INTERNAL,
-					CRYPTO_ALG_INTERNAL);
+	cryptd_tfm = cryptd_alloc_ahash("ghash-ce-sync", 0, 0);
 	if (IS_ERR(cryptd_tfm))
 		return PTR_ERR(cryptd_tfm);
 	ctx->cryptd_tfm = cryptd_tfm;
diff --git a/arch/arm/crypto/sha256_glue.c b/arch/arm/crypto/sha256_glue.c
index 70efa96..215497f 100644
--- a/arch/arm/crypto/sha256_glue.c
+++ b/arch/arm/crypto/sha256_glue.c
@@ -39,7 +39,7 @@
 }
 EXPORT_SYMBOL(crypto_sha256_arm_update);
 
-static int sha256_final(struct shash_desc *desc, u8 *out)
+static int crypto_sha256_arm_final(struct shash_desc *desc, u8 *out)
 {
 	sha256_base_do_finalize(desc,
 				(sha256_block_fn *)sha256_block_data_order);
@@ -51,7 +51,7 @@
 {
 	sha256_base_do_update(desc, data, len,
 			      (sha256_block_fn *)sha256_block_data_order);
-	return sha256_final(desc, out);
+	return crypto_sha256_arm_final(desc, out);
 }
 EXPORT_SYMBOL(crypto_sha256_arm_finup);
 
@@ -59,7 +59,7 @@
 	.digestsize	=	SHA256_DIGEST_SIZE,
 	.init		=	sha256_base_init,
 	.update		=	crypto_sha256_arm_update,
-	.final		=	sha256_final,
+	.final		=	crypto_sha256_arm_final,
 	.finup		=	crypto_sha256_arm_finup,
 	.descsize	=	sizeof(struct sha256_state),
 	.base		=	{
@@ -73,7 +73,7 @@
 	.digestsize	=	SHA224_DIGEST_SIZE,
 	.init		=	sha224_base_init,
 	.update		=	crypto_sha256_arm_update,
-	.final		=	sha256_final,
+	.final		=	crypto_sha256_arm_final,
 	.finup		=	crypto_sha256_arm_finup,
 	.descsize	=	sizeof(struct sha256_state),
 	.base		=	{
diff --git a/arch/arm/crypto/sha256_neon_glue.c b/arch/arm/crypto/sha256_neon_glue.c
index a7ce38a..38645e4 100644
--- a/arch/arm/crypto/sha256_neon_glue.c
+++ b/arch/arm/crypto/sha256_neon_glue.c
@@ -25,8 +25,8 @@
 asmlinkage void sha256_block_data_order_neon(u32 *digest, const void *data,
 					     unsigned int num_blks);
 
-static int sha256_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int len)
+static int crypto_sha256_neon_update(struct shash_desc *desc, const u8 *data,
+				     unsigned int len)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
@@ -42,8 +42,8 @@
 	return 0;
 }
 
-static int sha256_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
+static int crypto_sha256_neon_finup(struct shash_desc *desc, const u8 *data,
+				    unsigned int len, u8 *out)
 {
 	if (!crypto_simd_usable())
 		return crypto_sha256_arm_finup(desc, data, len, out);
@@ -59,17 +59,17 @@
 	return sha256_base_finish(desc, out);
 }
 
-static int sha256_final(struct shash_desc *desc, u8 *out)
+static int crypto_sha256_neon_final(struct shash_desc *desc, u8 *out)
 {
-	return sha256_finup(desc, NULL, 0, out);
+	return crypto_sha256_neon_finup(desc, NULL, 0, out);
 }
 
 struct shash_alg sha256_neon_algs[] = { {
 	.digestsize	=	SHA256_DIGEST_SIZE,
 	.init		=	sha256_base_init,
-	.update		=	sha256_update,
-	.final		=	sha256_final,
-	.finup		=	sha256_finup,
+	.update		=	crypto_sha256_neon_update,
+	.final		=	crypto_sha256_neon_final,
+	.finup		=	crypto_sha256_neon_finup,
 	.descsize	=	sizeof(struct sha256_state),
 	.base		=	{
 		.cra_name	=	"sha256",
@@ -81,9 +81,9 @@
 }, {
 	.digestsize	=	SHA224_DIGEST_SIZE,
 	.init		=	sha224_base_init,
-	.update		=	sha256_update,
-	.final		=	sha256_final,
-	.finup		=	sha256_finup,
+	.update		=	crypto_sha256_neon_update,
+	.final		=	crypto_sha256_neon_final,
+	.finup		=	crypto_sha256_neon_finup,
 	.descsize	=	sizeof(struct sha256_state),
 	.base		=	{
 		.cra_name	=	"sha224",
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 6b2dc15..68ca86f 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -17,7 +17,6 @@
 generic-y += preempt.h
 generic-y += seccomp.h
 generic-y += serial.h
-generic-y += simd.h
 generic-y += trace_clock.h
 
 generated-y += mach-types.h
diff --git a/arch/arm64/boot/dts/freescale/imx8mq.dtsi b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
index 3f3594d..04115ca 100644
--- a/arch/arm64/boot/dts/freescale/imx8mq.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mq.dtsi
@@ -751,6 +751,36 @@
 				status = "disabled";
 			};
 
+			crypto: crypto@30900000 {
+				compatible = "fsl,sec-v4.0";
+				#address-cells = <1>;
+				#size-cells = <1>;
+				reg = <0x30900000 0x40000>;
+				ranges = <0 0x30900000 0x40000>;
+				interrupts = <GIC_SPI 91 IRQ_TYPE_LEVEL_HIGH>;
+				clocks = <&clk IMX8MQ_CLK_AHB>,
+					 <&clk IMX8MQ_CLK_IPG_ROOT>;
+				clock-names = "aclk", "ipg";
+
+				sec_jr0: jr@1000 {
+					compatible = "fsl,sec-v4.0-job-ring";
+					reg = <0x1000 0x1000>;
+					interrupts = <GIC_SPI 105 IRQ_TYPE_LEVEL_HIGH>;
+				};
+
+				sec_jr1: jr@2000 {
+					compatible = "fsl,sec-v4.0-job-ring";
+					reg = <0x2000 0x1000>;
+					interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;
+				};
+
+				sec_jr2: jr@3000 {
+					compatible = "fsl,sec-v4.0-job-ring";
+					reg = <0x3000 0x1000>;
+					interrupts = <GIC_SPI 114 IRQ_TYPE_LEVEL_HIGH>;
+				};
+			};
+
 			dphy: dphy@30a00300 {
 				compatible = "fsl,imx8mq-mipi-dphy";
 				reg = <0x30a00300 0x100>;
diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig
index d9a523e..4922c44 100644
--- a/arch/arm64/crypto/Kconfig
+++ b/arch/arm64/crypto/Kconfig
@@ -58,8 +58,7 @@
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_HASH
 	select CRYPTO_GF128MUL
-	select CRYPTO_AES
-	select CRYPTO_AES_ARM64
+	select CRYPTO_LIB_AES
 
 config CRYPTO_CRCT10DIF_ARM64_CE
 	tristate "CRCT10DIF digest algorithm using PMULL instructions"
@@ -74,15 +73,15 @@
 	tristate "AES core cipher using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
-	select CRYPTO_AES_ARM64
+	select CRYPTO_LIB_AES
 
 config CRYPTO_AES_ARM64_CE_CCM
 	tristate "AES in CCM mode using ARMv8 Crypto Extensions"
 	depends on ARM64 && KERNEL_MODE_NEON
 	select CRYPTO_ALGAPI
 	select CRYPTO_AES_ARM64_CE
-	select CRYPTO_AES_ARM64
 	select CRYPTO_AEAD
+	select CRYPTO_LIB_AES
 
 config CRYPTO_AES_ARM64_CE_BLK
 	tristate "AES in ECB/CBC/CTR/XTS modes using ARMv8 Crypto Extensions"
@@ -97,7 +96,7 @@
 	depends on KERNEL_MODE_NEON
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES_ARM64
-	select CRYPTO_AES
+	select CRYPTO_LIB_AES
 	select CRYPTO_SIMD
 
 config CRYPTO_CHACHA20_NEON
@@ -117,6 +116,7 @@
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES_ARM64_NEON_BLK
 	select CRYPTO_AES_ARM64
+	select CRYPTO_LIB_AES
 	select CRYPTO_SIMD
 
 endif
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 827e547..541cf91 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -43,8 +43,6 @@
 asmlinkage void ce_aes_ccm_final(u8 mac[], u8 const ctr[], u32 const rk[],
 				 u32 rounds);
 
-asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-
 static int ccm_setkey(struct crypto_aead *tfm, const u8 *in_key,
 		      unsigned int key_len)
 {
@@ -124,8 +122,7 @@
 		}
 
 		while (abytes >= AES_BLOCK_SIZE) {
-			__aes_arm64_encrypt(key->key_enc, mac, mac,
-					    num_rounds(key));
+			aes_encrypt(key, mac, mac);
 			crypto_xor(mac, in, AES_BLOCK_SIZE);
 
 			in += AES_BLOCK_SIZE;
@@ -133,8 +130,7 @@
 		}
 
 		if (abytes > 0) {
-			__aes_arm64_encrypt(key->key_enc, mac, mac,
-					    num_rounds(key));
+			aes_encrypt(key, mac, mac);
 			crypto_xor(mac, in, abytes);
 			*macp = abytes;
 		}
@@ -206,10 +202,8 @@
 				bsize = nbytes;
 
 			crypto_inc(walk->iv, AES_BLOCK_SIZE);
-			__aes_arm64_encrypt(ctx->key_enc, buf, walk->iv,
-					    num_rounds(ctx));
-			__aes_arm64_encrypt(ctx->key_enc, mac, mac,
-					    num_rounds(ctx));
+			aes_encrypt(ctx, buf, walk->iv);
+			aes_encrypt(ctx, mac, mac);
 			if (enc)
 				crypto_xor(mac, src, bsize);
 			crypto_xor_cpy(dst, src, buf, bsize);
@@ -224,8 +218,8 @@
 	}
 
 	if (!err) {
-		__aes_arm64_encrypt(ctx->key_enc, buf, iv0, num_rounds(ctx));
-		__aes_arm64_encrypt(ctx->key_enc, mac, mac, num_rounds(ctx));
+		aes_encrypt(ctx, buf, iv0);
+		aes_encrypt(ctx, mac, mac);
 		crypto_xor(mac, buf, AES_BLOCK_SIZE);
 	}
 	return err;
diff --git a/arch/arm64/crypto/aes-ce-glue.c b/arch/arm64/crypto/aes-ce-glue.c
index d3bc97a..6d085dc 100644
--- a/arch/arm64/crypto/aes-ce-glue.c
+++ b/arch/arm64/crypto/aes-ce-glue.c
@@ -20,9 +20,6 @@
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
-asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-
 struct aes_block {
 	u8 b[AES_BLOCK_SIZE];
 };
@@ -51,7 +48,7 @@
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (!crypto_simd_usable()) {
-		__aes_arm64_encrypt(ctx->key_enc, dst, src, num_rounds(ctx));
+		aes_encrypt(ctx, dst, src);
 		return;
 	}
 
@@ -65,7 +62,7 @@
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	if (!crypto_simd_usable()) {
-		__aes_arm64_decrypt(ctx->key_dec, dst, src, num_rounds(ctx));
+		aes_decrypt(ctx, dst, src);
 		return;
 	}
 
diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S
index 00bd288..c132c49 100644
--- a/arch/arm64/crypto/aes-ce.S
+++ b/arch/arm64/crypto/aes-ce.S
@@ -21,6 +21,9 @@
 	.macro		xts_reload_mask, tmp
 	.endm
 
+	.macro		xts_cts_skip_tw, reg, lbl
+	.endm
+
 	/* preload all round keys */
 	.macro		load_round_keys, rounds, rk
 	cmp		\rounds, #12
diff --git a/arch/arm64/crypto/aes-cipher-core.S b/arch/arm64/crypto/aes-cipher-core.S
index f06df0d..423d0ae 100644
--- a/arch/arm64/crypto/aes-cipher-core.S
+++ b/arch/arm64/crypto/aes-cipher-core.S
@@ -128,43 +128,5 @@
 
 	.align		5
 ENTRY(__aes_arm64_decrypt)
-	do_crypt	iround, crypto_it_tab, __aes_arm64_inverse_sbox, 0
+	do_crypt	iround, crypto_it_tab, crypto_aes_inv_sbox, 0
 ENDPROC(__aes_arm64_decrypt)
-
-	.section	".rodata", "a"
-	.align		L1_CACHE_SHIFT
-	.type		__aes_arm64_inverse_sbox, %object
-__aes_arm64_inverse_sbox:
-	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
-	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
-	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
-	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
-	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
-	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
-	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
-	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
-	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
-	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
-	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
-	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
-	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
-	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
-	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
-	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
-	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
-	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
-	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
-	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
-	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
-	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
-	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
-	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
-	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
-	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
-	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
-	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
-	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
-	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
-	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
-	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
-	.size		__aes_arm64_inverse_sbox, . - __aes_arm64_inverse_sbox
diff --git a/arch/arm64/crypto/aes-cipher-glue.c b/arch/arm64/crypto/aes-cipher-glue.c
index 0913966..8caf6df 100644
--- a/arch/arm64/crypto/aes-cipher-glue.c
+++ b/arch/arm64/crypto/aes-cipher-glue.c
@@ -10,12 +10,9 @@
 #include <linux/module.h>
 
 asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-EXPORT_SYMBOL(__aes_arm64_encrypt);
-
 asmlinkage void __aes_arm64_decrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-EXPORT_SYMBOL(__aes_arm64_decrypt);
 
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void aes_arm64_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	int rounds = 6 + ctx->key_length / 4;
@@ -23,7 +20,7 @@
 	__aes_arm64_encrypt(ctx->key_enc, out, in, rounds);
 }
 
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void aes_arm64_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	int rounds = 6 + ctx->key_length / 4;
@@ -43,8 +40,8 @@
 	.cra_cipher.cia_min_keysize	= AES_MIN_KEY_SIZE,
 	.cra_cipher.cia_max_keysize	= AES_MAX_KEY_SIZE,
 	.cra_cipher.cia_setkey		= crypto_aes_set_key,
-	.cra_cipher.cia_encrypt		= aes_encrypt,
-	.cra_cipher.cia_decrypt		= aes_decrypt
+	.cra_cipher.cia_encrypt		= aes_arm64_encrypt,
+	.cra_cipher.cia_decrypt		= aes_arm64_decrypt
 };
 
 static int __init aes_init(void)
diff --git a/arch/arm64/crypto/aes-ctr-fallback.h b/arch/arm64/crypto/aes-ctr-fallback.h
deleted file mode 100644
index 3ac9119..0000000
--- a/arch/arm64/crypto/aes-ctr-fallback.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Fallback for sync aes(ctr) in contexts where kernel mode NEON
- * is not allowed
- *
- * Copyright (C) 2017 Linaro Ltd <ard.biesheuvel@linaro.org>
- */
-
-#include <crypto/aes.h>
-#include <crypto/internal/skcipher.h>
-
-asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-
-static inline int aes_ctr_encrypt_fallback(struct crypto_aes_ctx *ctx,
-					   struct skcipher_request *req)
-{
-	struct skcipher_walk walk;
-	u8 buf[AES_BLOCK_SIZE];
-	int err;
-
-	err = skcipher_walk_virt(&walk, req, true);
-
-	while (walk.nbytes > 0) {
-		u8 *dst = walk.dst.virt.addr;
-		u8 *src = walk.src.virt.addr;
-		int nbytes = walk.nbytes;
-		int tail = 0;
-
-		if (nbytes < walk.total) {
-			nbytes = round_down(nbytes, AES_BLOCK_SIZE);
-			tail = walk.nbytes % AES_BLOCK_SIZE;
-		}
-
-		do {
-			int bsize = min(nbytes, AES_BLOCK_SIZE);
-
-			__aes_arm64_encrypt(ctx->key_enc, buf, walk.iv,
-					    6 + ctx->key_length / 4);
-			crypto_xor_cpy(dst, src, buf, bsize);
-			crypto_inc(walk.iv, AES_BLOCK_SIZE);
-
-			dst += AES_BLOCK_SIZE;
-			src += AES_BLOCK_SIZE;
-			nbytes -= AES_BLOCK_SIZE;
-		} while (nbytes > 0);
-
-		err = skcipher_walk_done(&walk, tail);
-	}
-	return err;
-}
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 8d6c893..aa57dc6 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -9,6 +9,8 @@
 #include <asm/hwcap.h>
 #include <asm/simd.h>
 #include <crypto/aes.h>
+#include <crypto/ctr.h>
+#include <crypto/sha.h>
 #include <crypto/internal/hash.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
@@ -18,12 +20,10 @@
 #include <crypto/xts.h>
 
 #include "aes-ce-setkey.h"
-#include "aes-ctr-fallback.h"
 
 #ifdef USE_V8_CRYPTO_EXTENSIONS
 #define MODE			"ce"
 #define PRIO			300
-#define aes_setkey		ce_aes_setkey
 #define aes_expandkey		ce_aes_expandkey
 #define aes_ecb_encrypt		ce_aes_ecb_encrypt
 #define aes_ecb_decrypt		ce_aes_ecb_decrypt
@@ -31,6 +31,8 @@
 #define aes_cbc_decrypt		ce_aes_cbc_decrypt
 #define aes_cbc_cts_encrypt	ce_aes_cbc_cts_encrypt
 #define aes_cbc_cts_decrypt	ce_aes_cbc_cts_decrypt
+#define aes_essiv_cbc_encrypt	ce_aes_essiv_cbc_encrypt
+#define aes_essiv_cbc_decrypt	ce_aes_essiv_cbc_decrypt
 #define aes_ctr_encrypt		ce_aes_ctr_encrypt
 #define aes_xts_encrypt		ce_aes_xts_encrypt
 #define aes_xts_decrypt		ce_aes_xts_decrypt
@@ -39,27 +41,31 @@
 #else
 #define MODE			"neon"
 #define PRIO			200
-#define aes_setkey		crypto_aes_set_key
-#define aes_expandkey		crypto_aes_expand_key
 #define aes_ecb_encrypt		neon_aes_ecb_encrypt
 #define aes_ecb_decrypt		neon_aes_ecb_decrypt
 #define aes_cbc_encrypt		neon_aes_cbc_encrypt
 #define aes_cbc_decrypt		neon_aes_cbc_decrypt
 #define aes_cbc_cts_encrypt	neon_aes_cbc_cts_encrypt
 #define aes_cbc_cts_decrypt	neon_aes_cbc_cts_decrypt
+#define aes_essiv_cbc_encrypt	neon_aes_essiv_cbc_encrypt
+#define aes_essiv_cbc_decrypt	neon_aes_essiv_cbc_decrypt
 #define aes_ctr_encrypt		neon_aes_ctr_encrypt
 #define aes_xts_encrypt		neon_aes_xts_encrypt
 #define aes_xts_decrypt		neon_aes_xts_decrypt
 #define aes_mac_update		neon_aes_mac_update
 MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
+#endif
+#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS)
 MODULE_ALIAS_CRYPTO("ecb(aes)");
 MODULE_ALIAS_CRYPTO("cbc(aes)");
 MODULE_ALIAS_CRYPTO("ctr(aes)");
 MODULE_ALIAS_CRYPTO("xts(aes)");
+#endif
+MODULE_ALIAS_CRYPTO("cts(cbc(aes))");
+MODULE_ALIAS_CRYPTO("essiv(cbc(aes),sha256)");
 MODULE_ALIAS_CRYPTO("cmac(aes)");
 MODULE_ALIAS_CRYPTO("xcbc(aes)");
 MODULE_ALIAS_CRYPTO("cbcmac(aes)");
-#endif
 
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
@@ -84,27 +90,34 @@
 				int rounds, int blocks, u8 ctr[]);
 
 asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[],
-				int rounds, int blocks, u32 const rk2[], u8 iv[],
+				int rounds, int bytes, u32 const rk2[], u8 iv[],
 				int first);
 asmlinkage void aes_xts_decrypt(u8 out[], u8 const in[], u32 const rk1[],
-				int rounds, int blocks, u32 const rk2[], u8 iv[],
+				int rounds, int bytes, u32 const rk2[], u8 iv[],
 				int first);
 
+asmlinkage void aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
+				      int rounds, int blocks, u8 iv[],
+				      u32 const rk2[]);
+asmlinkage void aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
+				      int rounds, int blocks, u8 iv[],
+				      u32 const rk2[]);
+
 asmlinkage void aes_mac_update(u8 const in[], u32 const rk[], int rounds,
 			       int blocks, u8 dg[], int enc_before,
 			       int enc_after);
 
-struct cts_cbc_req_ctx {
-	struct scatterlist sg_src[2];
-	struct scatterlist sg_dst[2];
-	struct skcipher_request subreq;
-};
-
 struct crypto_aes_xts_ctx {
 	struct crypto_aes_ctx key1;
 	struct crypto_aes_ctx __aligned(8) key2;
 };
 
+struct crypto_aes_essiv_cbc_ctx {
+	struct crypto_aes_ctx key1;
+	struct crypto_aes_ctx __aligned(8) key2;
+	struct crypto_shash *hash;
+};
+
 struct mac_tfm_ctx {
 	struct crypto_aes_ctx key;
 	u8 __aligned(8) consts[];
@@ -118,11 +131,18 @@
 static int skcipher_aes_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
 			       unsigned int key_len)
 {
-	return aes_setkey(crypto_skcipher_tfm(tfm), in_key, key_len);
+	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int ret;
+
+	ret = aes_expandkey(ctx, in_key, key_len);
+	if (ret)
+		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+
+	return ret;
 }
 
-static int xts_set_key(struct crypto_skcipher *tfm, const u8 *in_key,
-		       unsigned int key_len)
+static int __maybe_unused xts_set_key(struct crypto_skcipher *tfm,
+				      const u8 *in_key, unsigned int key_len)
 {
 	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int ret;
@@ -142,7 +162,33 @@
 	return -EINVAL;
 }
 
-static int ecb_encrypt(struct skcipher_request *req)
+static int __maybe_unused essiv_cbc_set_key(struct crypto_skcipher *tfm,
+					    const u8 *in_key,
+					    unsigned int key_len)
+{
+	struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	SHASH_DESC_ON_STACK(desc, ctx->hash);
+	u8 digest[SHA256_DIGEST_SIZE];
+	int ret;
+
+	ret = aes_expandkey(&ctx->key1, in_key, key_len);
+	if (ret)
+		goto out;
+
+	desc->tfm = ctx->hash;
+	crypto_shash_digest(desc, in_key, key_len, digest);
+
+	ret = aes_expandkey(&ctx->key2, digest, sizeof(digest));
+	if (ret)
+		goto out;
+
+	return 0;
+out:
+	crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+	return -EINVAL;
+}
+
+static int __maybe_unused ecb_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -162,7 +208,7 @@
 	return err;
 }
 
-static int ecb_decrypt(struct skcipher_request *req)
+static int __maybe_unused ecb_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -182,63 +228,78 @@
 	return err;
 }
 
-static int cbc_encrypt(struct skcipher_request *req)
+static int cbc_encrypt_walk(struct skcipher_request *req,
+			    struct skcipher_walk *walk)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, rounds = 6 + ctx->key_length / 4;
-	struct skcipher_walk walk;
+	int err = 0, rounds = 6 + ctx->key_length / 4;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, false);
-
-	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+	while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
 		kernel_neon_begin();
-		aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_enc, rounds, blocks, walk.iv);
+		aes_cbc_encrypt(walk->dst.virt.addr, walk->src.virt.addr,
+				ctx->key_enc, rounds, blocks, walk->iv);
 		kernel_neon_end();
-		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
 	}
 	return err;
 }
 
-static int cbc_decrypt(struct skcipher_request *req)
+static int __maybe_unused cbc_encrypt(struct skcipher_request *req)
+{
+	struct skcipher_walk walk;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
+	return cbc_encrypt_walk(req, &walk);
+}
+
+static int cbc_decrypt_walk(struct skcipher_request *req,
+			    struct skcipher_walk *walk)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	int err, rounds = 6 + ctx->key_length / 4;
-	struct skcipher_walk walk;
+	int err = 0, rounds = 6 + ctx->key_length / 4;
 	unsigned int blocks;
 
-	err = skcipher_walk_virt(&walk, req, false);
-
-	while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
+	while ((blocks = (walk->nbytes / AES_BLOCK_SIZE))) {
 		kernel_neon_begin();
-		aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key_dec, rounds, blocks, walk.iv);
+		aes_cbc_decrypt(walk->dst.virt.addr, walk->src.virt.addr,
+				ctx->key_dec, rounds, blocks, walk->iv);
 		kernel_neon_end();
-		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(walk, walk->nbytes % AES_BLOCK_SIZE);
 	}
 	return err;
 }
 
-static int cts_cbc_init_tfm(struct crypto_skcipher *tfm)
+static int __maybe_unused cbc_decrypt(struct skcipher_request *req)
 {
-	crypto_skcipher_set_reqsize(tfm, sizeof(struct cts_cbc_req_ctx));
-	return 0;
+	struct skcipher_walk walk;
+	int err;
+
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
+	return cbc_decrypt_walk(req, &walk);
 }
 
 static int cts_cbc_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
 	int err, rounds = 6 + ctx->key_length / 4;
 	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
 	struct scatterlist *src = req->src, *dst = req->dst;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
 	struct skcipher_walk walk;
 
-	skcipher_request_set_tfm(&rctx->subreq, tfm);
+	skcipher_request_set_tfm(&subreq, tfm);
+	skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+				      NULL, NULL);
 
 	if (req->cryptlen <= AES_BLOCK_SIZE) {
 		if (req->cryptlen < AES_BLOCK_SIZE)
@@ -247,41 +308,30 @@
 	}
 
 	if (cbc_blocks > 0) {
-		unsigned int blocks;
-
-		skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
 					   cbc_blocks * AES_BLOCK_SIZE,
 					   req->iv);
 
-		err = skcipher_walk_virt(&walk, &rctx->subreq, false);
-
-		while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
-			kernel_neon_begin();
-			aes_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-					ctx->key_enc, rounds, blocks, walk.iv);
-			kernel_neon_end();
-			err = skcipher_walk_done(&walk,
-						 walk.nbytes % AES_BLOCK_SIZE);
-		}
+		err = skcipher_walk_virt(&walk, &subreq, false) ?:
+		      cbc_encrypt_walk(&subreq, &walk);
 		if (err)
 			return err;
 
 		if (req->cryptlen == AES_BLOCK_SIZE)
 			return 0;
 
-		dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
-					     rctx->subreq.cryptlen);
+		dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
 		if (req->dst != req->src)
-			dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
-					       rctx->subreq.cryptlen);
+			dst = scatterwalk_ffwd(sg_dst, req->dst,
+					       subreq.cryptlen);
 	}
 
 	/* handle ciphertext stealing */
-	skcipher_request_set_crypt(&rctx->subreq, src, dst,
+	skcipher_request_set_crypt(&subreq, src, dst,
 				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
 				   req->iv);
 
-	err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+	err = skcipher_walk_virt(&walk, &subreq, false);
 	if (err)
 		return err;
 
@@ -297,13 +347,16 @@
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
-	struct cts_cbc_req_ctx *rctx = skcipher_request_ctx(req);
 	int err, rounds = 6 + ctx->key_length / 4;
 	int cbc_blocks = DIV_ROUND_UP(req->cryptlen, AES_BLOCK_SIZE) - 2;
 	struct scatterlist *src = req->src, *dst = req->dst;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
 	struct skcipher_walk walk;
 
-	skcipher_request_set_tfm(&rctx->subreq, tfm);
+	skcipher_request_set_tfm(&subreq, tfm);
+	skcipher_request_set_callback(&subreq, skcipher_request_flags(req),
+				      NULL, NULL);
 
 	if (req->cryptlen <= AES_BLOCK_SIZE) {
 		if (req->cryptlen < AES_BLOCK_SIZE)
@@ -312,41 +365,30 @@
 	}
 
 	if (cbc_blocks > 0) {
-		unsigned int blocks;
-
-		skcipher_request_set_crypt(&rctx->subreq, req->src, req->dst,
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
 					   cbc_blocks * AES_BLOCK_SIZE,
 					   req->iv);
 
-		err = skcipher_walk_virt(&walk, &rctx->subreq, false);
-
-		while ((blocks = (walk.nbytes / AES_BLOCK_SIZE))) {
-			kernel_neon_begin();
-			aes_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-					ctx->key_dec, rounds, blocks, walk.iv);
-			kernel_neon_end();
-			err = skcipher_walk_done(&walk,
-						 walk.nbytes % AES_BLOCK_SIZE);
-		}
+		err = skcipher_walk_virt(&walk, &subreq, false) ?:
+		      cbc_decrypt_walk(&subreq, &walk);
 		if (err)
 			return err;
 
 		if (req->cryptlen == AES_BLOCK_SIZE)
 			return 0;
 
-		dst = src = scatterwalk_ffwd(rctx->sg_src, req->src,
-					     rctx->subreq.cryptlen);
+		dst = src = scatterwalk_ffwd(sg_src, req->src, subreq.cryptlen);
 		if (req->dst != req->src)
-			dst = scatterwalk_ffwd(rctx->sg_dst, req->dst,
-					       rctx->subreq.cryptlen);
+			dst = scatterwalk_ffwd(sg_dst, req->dst,
+					       subreq.cryptlen);
 	}
 
 	/* handle ciphertext stealing */
-	skcipher_request_set_crypt(&rctx->subreq, src, dst,
+	skcipher_request_set_crypt(&subreq, src, dst,
 				   req->cryptlen - cbc_blocks * AES_BLOCK_SIZE,
 				   req->iv);
 
-	err = skcipher_walk_virt(&walk, &rctx->subreq, false);
+	err = skcipher_walk_virt(&walk, &subreq, false);
 	if (err)
 		return err;
 
@@ -358,6 +400,66 @@
 	return skcipher_walk_done(&walk, 0);
 }
 
+static int __maybe_unused essiv_cbc_init_tfm(struct crypto_skcipher *tfm)
+{
+	struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	ctx->hash = crypto_alloc_shash("sha256", 0, 0);
+
+	return PTR_ERR_OR_ZERO(ctx->hash);
+}
+
+static void __maybe_unused essiv_cbc_exit_tfm(struct crypto_skcipher *tfm)
+{
+	struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+
+	crypto_free_shash(ctx->hash);
+}
+
+static int __maybe_unused essiv_cbc_encrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err, rounds = 6 + ctx->key1.key_length / 4;
+	struct skcipher_walk walk;
+	unsigned int blocks;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	blocks = walk.nbytes / AES_BLOCK_SIZE;
+	if (blocks) {
+		kernel_neon_begin();
+		aes_essiv_cbc_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				      ctx->key1.key_enc, rounds, blocks,
+				      req->iv, ctx->key2.key_enc);
+		kernel_neon_end();
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	return err ?: cbc_encrypt_walk(req, &walk);
+}
+
+static int __maybe_unused essiv_cbc_decrypt(struct skcipher_request *req)
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	struct crypto_aes_essiv_cbc_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int err, rounds = 6 + ctx->key1.key_length / 4;
+	struct skcipher_walk walk;
+	unsigned int blocks;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	blocks = walk.nbytes / AES_BLOCK_SIZE;
+	if (blocks) {
+		kernel_neon_begin();
+		aes_essiv_cbc_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				      ctx->key1.key_dec, rounds, blocks,
+				      req->iv, ctx->key2.key_enc);
+		kernel_neon_end();
+		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+	}
+	return err ?: cbc_decrypt_walk(req, &walk);
+}
+
 static int ctr_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
@@ -397,62 +499,176 @@
 	return err;
 }
 
-static int ctr_encrypt_sync(struct skcipher_request *req)
+static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	const struct crypto_aes_ctx *ctx = crypto_skcipher_ctx(tfm);
+	unsigned long flags;
 
+	/*
+	 * Temporarily disable interrupts to avoid races where
+	 * cachelines are evicted when the CPU is interrupted
+	 * to do something else.
+	 */
+	local_irq_save(flags);
+	aes_encrypt(ctx, dst, src);
+	local_irq_restore(flags);
+}
+
+static int __maybe_unused ctr_encrypt_sync(struct skcipher_request *req)
+{
 	if (!crypto_simd_usable())
-		return aes_ctr_encrypt_fallback(ctx, req);
+		return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
 
 	return ctr_encrypt(req);
 }
 
-static int xts_encrypt(struct skcipher_request *req)
+static int __maybe_unused xts_encrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = 6 + ctx->key1.key_length / 4;
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct scatterlist *src, *dst;
 	struct skcipher_walk walk;
-	unsigned int blocks;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
 
 	err = skcipher_walk_virt(&walk, req, false);
 
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
+	if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+		int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+					      AES_BLOCK_SIZE) - 2;
+
+		skcipher_walk_abort(&walk);
+
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      skcipher_request_flags(req),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   xts_blocks * AES_BLOCK_SIZE,
+					   req->iv);
+		req = &subreq;
+		err = skcipher_walk_virt(&walk, req, false);
+	} else {
+		tail = 0;
+	}
+
+	for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
+		int nbytes = walk.nbytes;
+
+		if (walk.nbytes < walk.total)
+			nbytes &= ~(AES_BLOCK_SIZE - 1);
+
 		kernel_neon_begin();
 		aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key1.key_enc, rounds, blocks,
+				ctx->key1.key_enc, rounds, nbytes,
 				ctx->key2.key_enc, walk.iv, first);
 		kernel_neon_end();
-		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
 	}
 
-	return err;
+	if (err || likely(!tail))
+		return err;
+
+	dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+	if (req->dst != req->src)
+		dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+	skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, &subreq, false);
+	if (err)
+		return err;
+
+	kernel_neon_begin();
+	aes_xts_encrypt(walk.dst.virt.addr, walk.src.virt.addr,
+			ctx->key1.key_enc, rounds, walk.nbytes,
+			ctx->key2.key_enc, walk.iv, first);
+	kernel_neon_end();
+
+	return skcipher_walk_done(&walk, 0);
 }
 
-static int xts_decrypt(struct skcipher_request *req)
+static int __maybe_unused xts_decrypt(struct skcipher_request *req)
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct crypto_aes_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err, first, rounds = 6 + ctx->key1.key_length / 4;
+	int tail = req->cryptlen % AES_BLOCK_SIZE;
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct scatterlist *src, *dst;
 	struct skcipher_walk walk;
-	unsigned int blocks;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
 
 	err = skcipher_walk_virt(&walk, req, false);
 
-	for (first = 1; (blocks = (walk.nbytes / AES_BLOCK_SIZE)); first = 0) {
-		kernel_neon_begin();
-		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
-				ctx->key1.key_dec, rounds, blocks,
-				ctx->key2.key_enc, walk.iv, first);
-		kernel_neon_end();
-		err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE);
+	if (unlikely(tail > 0 && walk.nbytes < walk.total)) {
+		int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+					      AES_BLOCK_SIZE) - 2;
+
+		skcipher_walk_abort(&walk);
+
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      skcipher_request_flags(req),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   xts_blocks * AES_BLOCK_SIZE,
+					   req->iv);
+		req = &subreq;
+		err = skcipher_walk_virt(&walk, req, false);
+	} else {
+		tail = 0;
 	}
 
-	return err;
+	for (first = 1; walk.nbytes >= AES_BLOCK_SIZE; first = 0) {
+		int nbytes = walk.nbytes;
+
+		if (walk.nbytes < walk.total)
+			nbytes &= ~(AES_BLOCK_SIZE - 1);
+
+		kernel_neon_begin();
+		aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+				ctx->key1.key_dec, rounds, nbytes,
+				ctx->key2.key_enc, walk.iv, first);
+		kernel_neon_end();
+		err = skcipher_walk_done(&walk, walk.nbytes - nbytes);
+	}
+
+	if (err || likely(!tail))
+		return err;
+
+	dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+	if (req->dst != req->src)
+		dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+	skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, &subreq, false);
+	if (err)
+		return err;
+
+
+	kernel_neon_begin();
+	aes_xts_decrypt(walk.dst.virt.addr, walk.src.virt.addr,
+			ctx->key1.key_dec, rounds, walk.nbytes,
+			ctx->key2.key_enc, walk.iv, first);
+	kernel_neon_end();
+
+	return skcipher_walk_done(&walk, 0);
 }
 
 static struct skcipher_alg aes_algs[] = { {
+#if defined(USE_V8_CRYPTO_EXTENSIONS) || !defined(CONFIG_CRYPTO_AES_ARM64_BS)
 	.base = {
 		.cra_name		= "__ecb(aes)",
 		.cra_driver_name	= "__ecb-aes-" MODE,
@@ -485,24 +701,6 @@
 	.decrypt	= cbc_decrypt,
 }, {
 	.base = {
-		.cra_name		= "__cts(cbc(aes))",
-		.cra_driver_name	= "__cts-cbc-aes-" MODE,
-		.cra_priority		= PRIO,
-		.cra_flags		= CRYPTO_ALG_INTERNAL,
-		.cra_blocksize		= AES_BLOCK_SIZE,
-		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-		.cra_module		= THIS_MODULE,
-	},
-	.min_keysize	= AES_MIN_KEY_SIZE,
-	.max_keysize	= AES_MAX_KEY_SIZE,
-	.ivsize		= AES_BLOCK_SIZE,
-	.walksize	= 2 * AES_BLOCK_SIZE,
-	.setkey		= skcipher_aes_setkey,
-	.encrypt	= cts_cbc_encrypt,
-	.decrypt	= cts_cbc_decrypt,
-	.init		= cts_cbc_init_tfm,
-}, {
-	.base = {
 		.cra_name		= "__ctr(aes)",
 		.cra_driver_name	= "__ctr-aes-" MODE,
 		.cra_priority		= PRIO,
@@ -547,9 +745,46 @@
 	.min_keysize	= 2 * AES_MIN_KEY_SIZE,
 	.max_keysize	= 2 * AES_MAX_KEY_SIZE,
 	.ivsize		= AES_BLOCK_SIZE,
+	.walksize	= 2 * AES_BLOCK_SIZE,
 	.setkey		= xts_set_key,
 	.encrypt	= xts_encrypt,
 	.decrypt	= xts_decrypt,
+}, {
+#endif
+	.base = {
+		.cra_name		= "__cts(cbc(aes))",
+		.cra_driver_name	= "__cts-cbc-aes-" MODE,
+		.cra_priority		= PRIO,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
+		.cra_module		= THIS_MODULE,
+	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.walksize	= 2 * AES_BLOCK_SIZE,
+	.setkey		= skcipher_aes_setkey,
+	.encrypt	= cts_cbc_encrypt,
+	.decrypt	= cts_cbc_decrypt,
+}, {
+	.base = {
+		.cra_name		= "__essiv(cbc(aes),sha256)",
+		.cra_driver_name	= "__essiv-cbc-aes-sha256-" MODE,
+		.cra_priority		= PRIO + 1,
+		.cra_flags		= CRYPTO_ALG_INTERNAL,
+		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_ctxsize		= sizeof(struct crypto_aes_essiv_cbc_ctx),
+		.cra_module		= THIS_MODULE,
+	},
+	.min_keysize	= AES_MIN_KEY_SIZE,
+	.max_keysize	= AES_MAX_KEY_SIZE,
+	.ivsize		= AES_BLOCK_SIZE,
+	.setkey		= essiv_cbc_set_key,
+	.encrypt	= essiv_cbc_encrypt,
+	.decrypt	= essiv_cbc_decrypt,
+	.init		= essiv_cbc_init_tfm,
+	.exit		= essiv_cbc_exit_tfm,
 } };
 
 static int cbcmac_setkey(struct crypto_shash *tfm, const u8 *in_key,
@@ -646,15 +881,14 @@
 		kernel_neon_end();
 	} else {
 		if (enc_before)
-			__aes_arm64_encrypt(ctx->key_enc, dg, dg, rounds);
+			aes_encrypt(ctx, dg, dg);
 
 		while (blocks--) {
 			crypto_xor(dg, in, AES_BLOCK_SIZE);
 			in += AES_BLOCK_SIZE;
 
 			if (blocks || enc_after)
-				__aes_arm64_encrypt(ctx->key_enc, dg, dg,
-						    rounds);
+				aes_encrypt(ctx, dg, dg);
 		}
 	}
 }
@@ -837,5 +1071,7 @@
 module_init(aes_init);
 EXPORT_SYMBOL(neon_aes_ecb_encrypt);
 EXPORT_SYMBOL(neon_aes_cbc_encrypt);
+EXPORT_SYMBOL(neon_aes_xts_encrypt);
+EXPORT_SYMBOL(neon_aes_xts_decrypt);
 #endif
 module_exit(aes_exit);
diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S
index 324039b..1316183 100644
--- a/arch/arm64/crypto/aes-modes.S
+++ b/arch/arm64/crypto/aes-modes.S
@@ -118,8 +118,23 @@
 	 *		   int blocks, u8 iv[])
 	 * aes_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		   int blocks, u8 iv[])
+	 * aes_essiv_cbc_encrypt(u8 out[], u8 const in[], u32 const rk1[],
+	 *			 int rounds, int blocks, u8 iv[],
+	 *			 u32 const rk2[]);
+	 * aes_essiv_cbc_decrypt(u8 out[], u8 const in[], u32 const rk1[],
+	 *			 int rounds, int blocks, u8 iv[],
+	 *			 u32 const rk2[]);
 	 */
 
+AES_ENTRY(aes_essiv_cbc_encrypt)
+	ld1		{v4.16b}, [x5]			/* get iv */
+
+	mov		w8, #14				/* AES-256: 14 rounds */
+	enc_prepare	w8, x6, x7
+	encrypt_block	v4, w8, x6, x7, w9
+	enc_switch_key	w3, x2, x6
+	b		.Lcbcencloop4x
+
 AES_ENTRY(aes_cbc_encrypt)
 	ld1		{v4.16b}, [x5]			/* get iv */
 	enc_prepare	w3, x2, x6
@@ -153,13 +168,25 @@
 	st1		{v4.16b}, [x5]			/* return iv */
 	ret
 AES_ENDPROC(aes_cbc_encrypt)
+AES_ENDPROC(aes_essiv_cbc_encrypt)
 
+AES_ENTRY(aes_essiv_cbc_decrypt)
+	stp		x29, x30, [sp, #-16]!
+	mov		x29, sp
+
+	ld1		{cbciv.16b}, [x5]		/* get iv */
+
+	mov		w8, #14				/* AES-256: 14 rounds */
+	enc_prepare	w8, x6, x7
+	encrypt_block	cbciv, w8, x6, x7, w9
+	b		.Lessivcbcdecstart
 
 AES_ENTRY(aes_cbc_decrypt)
 	stp		x29, x30, [sp, #-16]!
 	mov		x29, sp
 
 	ld1		{cbciv.16b}, [x5]		/* get iv */
+.Lessivcbcdecstart:
 	dec_prepare	w3, x2, x6
 
 .LcbcdecloopNx:
@@ -212,6 +239,7 @@
 	ldp		x29, x30, [sp], #16
 	ret
 AES_ENDPROC(aes_cbc_decrypt)
+AES_ENDPROC(aes_essiv_cbc_decrypt)
 
 
 	/*
@@ -265,12 +293,11 @@
 	ld1		{v5.16b}, [x5]			/* get iv */
 	dec_prepare	w3, x2, x6
 
-	tbl		v2.16b, {v1.16b}, v4.16b
 	decrypt_block	v0, w3, x2, x6, w7
-	eor		v2.16b, v2.16b, v0.16b
+	tbl		v2.16b, {v0.16b}, v3.16b
+	eor		v2.16b, v2.16b, v1.16b
 
 	tbx		v0.16b, {v1.16b}, v4.16b
-	tbl		v2.16b, {v2.16b}, v3.16b
 	decrypt_block	v0, w3, x2, x6, w7
 	eor		v0.16b, v0.16b, v5.16b		/* xor with iv */
 
@@ -386,10 +413,10 @@
 
 
 	/*
+	 * aes_xts_encrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
+	 *		   int bytes, u8 const rk2[], u8 iv[], int first)
 	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
-	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
-	 * aes_xts_decrypt(u8 out[], u8 const in[], u8 const rk1[], int rounds,
-	 *		   int blocks, u8 const rk2[], u8 iv[], int first)
+	 *		   int bytes, u8 const rk2[], u8 iv[], int first)
 	 */
 
 	.macro		next_tweak, out, in, tmp
@@ -415,6 +442,7 @@
 	cbz		w7, .Lxtsencnotfirst
 
 	enc_prepare	w3, x5, x8
+	xts_cts_skip_tw	w7, .LxtsencNx
 	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
 	enc_switch_key	w3, x2, x8
 	b		.LxtsencNx
@@ -424,7 +452,7 @@
 .LxtsencloopNx:
 	next_tweak	v4, v4, v8
 .LxtsencNx:
-	subs		w4, w4, #4
+	subs		w4, w4, #64
 	bmi		.Lxtsenc1x
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 pt blocks */
 	next_tweak	v5, v4, v8
@@ -441,39 +469,74 @@
 	eor		v2.16b, v2.16b, v6.16b
 	st1		{v0.16b-v3.16b}, [x0], #64
 	mov		v4.16b, v7.16b
-	cbz		w4, .Lxtsencout
+	cbz		w4, .Lxtsencret
 	xts_reload_mask	v8
 	b		.LxtsencloopNx
 .Lxtsenc1x:
-	adds		w4, w4, #4
+	adds		w4, w4, #64
 	beq		.Lxtsencout
+	subs		w4, w4, #16
+	bmi		.LxtsencctsNx
 .Lxtsencloop:
-	ld1		{v1.16b}, [x1], #16
-	eor		v0.16b, v1.16b, v4.16b
+	ld1		{v0.16b}, [x1], #16
+.Lxtsencctsout:
+	eor		v0.16b, v0.16b, v4.16b
 	encrypt_block	v0, w3, x2, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
-	st1		{v0.16b}, [x0], #16
-	subs		w4, w4, #1
-	beq		.Lxtsencout
+	cbz		w4, .Lxtsencout
+	subs		w4, w4, #16
 	next_tweak	v4, v4, v8
+	bmi		.Lxtsenccts
+	st1		{v0.16b}, [x0], #16
 	b		.Lxtsencloop
 .Lxtsencout:
+	st1		{v0.16b}, [x0]
+.Lxtsencret:
 	st1		{v4.16b}, [x6]
 	ldp		x29, x30, [sp], #16
 	ret
-AES_ENDPROC(aes_xts_encrypt)
 
+.LxtsencctsNx:
+	mov		v0.16b, v3.16b
+	sub		x0, x0, #16
+.Lxtsenccts:
+	adr_l		x8, .Lcts_permute_table
+
+	add		x1, x1, w4, sxtw	/* rewind input pointer */
+	add		w4, w4, #16		/* # bytes in final block */
+	add		x9, x8, #32
+	add		x8, x8, x4
+	sub		x9, x9, x4
+	add		x4, x0, x4		/* output address of final block */
+
+	ld1		{v1.16b}, [x1]		/* load final block */
+	ld1		{v2.16b}, [x8]
+	ld1		{v3.16b}, [x9]
+
+	tbl		v2.16b, {v0.16b}, v2.16b
+	tbx		v0.16b, {v1.16b}, v3.16b
+	st1		{v2.16b}, [x4]			/* overlapping stores */
+	mov		w4, wzr
+	b		.Lxtsencctsout
+AES_ENDPROC(aes_xts_encrypt)
 
 AES_ENTRY(aes_xts_decrypt)
 	stp		x29, x30, [sp, #-16]!
 	mov		x29, sp
 
+	/* subtract 16 bytes if we are doing CTS */
+	sub		w8, w4, #0x10
+	tst		w4, #0xf
+	csel		w4, w4, w8, eq
+
 	ld1		{v4.16b}, [x6]
 	xts_load_mask	v8
+	xts_cts_skip_tw	w7, .Lxtsdecskiptw
 	cbz		w7, .Lxtsdecnotfirst
 
 	enc_prepare	w3, x5, x8
 	encrypt_block	v4, w3, x5, x8, w7		/* first tweak */
+.Lxtsdecskiptw:
 	dec_prepare	w3, x2, x8
 	b		.LxtsdecNx
 
@@ -482,7 +545,7 @@
 .LxtsdecloopNx:
 	next_tweak	v4, v4, v8
 .LxtsdecNx:
-	subs		w4, w4, #4
+	subs		w4, w4, #64
 	bmi		.Lxtsdec1x
 	ld1		{v0.16b-v3.16b}, [x1], #64	/* get 4 ct blocks */
 	next_tweak	v5, v4, v8
@@ -503,22 +566,52 @@
 	xts_reload_mask	v8
 	b		.LxtsdecloopNx
 .Lxtsdec1x:
-	adds		w4, w4, #4
+	adds		w4, w4, #64
 	beq		.Lxtsdecout
+	subs		w4, w4, #16
 .Lxtsdecloop:
-	ld1		{v1.16b}, [x1], #16
-	eor		v0.16b, v1.16b, v4.16b
+	ld1		{v0.16b}, [x1], #16
+	bmi		.Lxtsdeccts
+.Lxtsdecctsout:
+	eor		v0.16b, v0.16b, v4.16b
 	decrypt_block	v0, w3, x2, x8, w7
 	eor		v0.16b, v0.16b, v4.16b
 	st1		{v0.16b}, [x0], #16
-	subs		w4, w4, #1
-	beq		.Lxtsdecout
+	cbz		w4, .Lxtsdecout
+	subs		w4, w4, #16
 	next_tweak	v4, v4, v8
 	b		.Lxtsdecloop
 .Lxtsdecout:
 	st1		{v4.16b}, [x6]
 	ldp		x29, x30, [sp], #16
 	ret
+
+.Lxtsdeccts:
+	adr_l		x8, .Lcts_permute_table
+
+	add		x1, x1, w4, sxtw	/* rewind input pointer */
+	add		w4, w4, #16		/* # bytes in final block */
+	add		x9, x8, #32
+	add		x8, x8, x4
+	sub		x9, x9, x4
+	add		x4, x0, x4		/* output address of final block */
+
+	next_tweak	v5, v4, v8
+
+	ld1		{v1.16b}, [x1]		/* load final block */
+	ld1		{v2.16b}, [x8]
+	ld1		{v3.16b}, [x9]
+
+	eor		v0.16b, v0.16b, v5.16b
+	decrypt_block	v0, w3, x2, x8, w7
+	eor		v0.16b, v0.16b, v5.16b
+
+	tbl		v2.16b, {v0.16b}, v2.16b
+	tbx		v0.16b, {v1.16b}, v3.16b
+
+	st1		{v2.16b}, [x4]			/* overlapping stores */
+	mov		w4, wzr
+	b		.Lxtsdecctsout
 AES_ENDPROC(aes_xts_decrypt)
 
 	/*
diff --git a/arch/arm64/crypto/aes-neon.S b/arch/arm64/crypto/aes-neon.S
index 2bebccc..22d9b11 100644
--- a/arch/arm64/crypto/aes-neon.S
+++ b/arch/arm64/crypto/aes-neon.S
@@ -19,6 +19,11 @@
 	xts_load_mask	\tmp
 	.endm
 
+	/* special case for the neon-bs driver calling into this one for CTS */
+	.macro		xts_cts_skip_tw, reg, lbl
+	tbnz		\reg, #1, \lbl
+	.endm
+
 	/* multiply by polynomial 'x' in GF(2^8) */
 	.macro		mul_by_x, out, in, temp, const
 	sshr		\temp, \in, #7
@@ -49,7 +54,7 @@
 
 	/* do preload for encryption */
 	.macro		enc_prepare, ignore0, ignore1, temp
-	prepare		.LForward_Sbox, .LForward_ShiftRows, \temp
+	prepare		crypto_aes_sbox, .LForward_ShiftRows, \temp
 	.endm
 
 	.macro		enc_switch_key, ignore0, ignore1, temp
@@ -58,7 +63,7 @@
 
 	/* do preload for decryption */
 	.macro		dec_prepare, ignore0, ignore1, temp
-	prepare		.LReverse_Sbox, .LReverse_ShiftRows, \temp
+	prepare		crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
 	.endm
 
 	/* apply SubBytes transformation using the the preloaded Sbox */
@@ -234,75 +239,7 @@
 #include "aes-modes.S"
 
 	.section	".rodata", "a"
-	.align		6
-.LForward_Sbox:
-	.byte		0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5
-	.byte		0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76
-	.byte		0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0
-	.byte		0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0
-	.byte		0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc
-	.byte		0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15
-	.byte		0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a
-	.byte		0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75
-	.byte		0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0
-	.byte		0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84
-	.byte		0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b
-	.byte		0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf
-	.byte		0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85
-	.byte		0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8
-	.byte		0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5
-	.byte		0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2
-	.byte		0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17
-	.byte		0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73
-	.byte		0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88
-	.byte		0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb
-	.byte		0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c
-	.byte		0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79
-	.byte		0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9
-	.byte		0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08
-	.byte		0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6
-	.byte		0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a
-	.byte		0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e
-	.byte		0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e
-	.byte		0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94
-	.byte		0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf
-	.byte		0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68
-	.byte		0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
-
-.LReverse_Sbox:
-	.byte		0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38
-	.byte		0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb
-	.byte		0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87
-	.byte		0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb
-	.byte		0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d
-	.byte		0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e
-	.byte		0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2
-	.byte		0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25
-	.byte		0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16
-	.byte		0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92
-	.byte		0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda
-	.byte		0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84
-	.byte		0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a
-	.byte		0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06
-	.byte		0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02
-	.byte		0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b
-	.byte		0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea
-	.byte		0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73
-	.byte		0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85
-	.byte		0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e
-	.byte		0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89
-	.byte		0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b
-	.byte		0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20
-	.byte		0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4
-	.byte		0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31
-	.byte		0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f
-	.byte		0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d
-	.byte		0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef
-	.byte		0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0
-	.byte		0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61
-	.byte		0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26
-	.byte		0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
-
+	.align		4
 .LForward_ShiftRows:
 	.octa		0x0b06010c07020d08030e09040f0a0500
 
diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S
index cf10ff8..6598203 100644
--- a/arch/arm64/crypto/aes-neonbs-core.S
+++ b/arch/arm64/crypto/aes-neonbs-core.S
@@ -730,11 +730,6 @@
 	eor		\out\().16b, \out\().16b, \tmp\().16b
 	.endm
 
-	.align		4
-.Lxts_mul_x:
-CPU_LE(	.quad		1, 0x87		)
-CPU_BE(	.quad		0x87, 1		)
-
 	/*
 	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
 	 *		     int blocks, u8 iv[])
@@ -806,7 +801,9 @@
 	mov		x23, x4
 	mov		x24, x5
 
-0:	ldr		q30, .Lxts_mul_x
+0:	movi		v30.2s, #0x1
+	movi		v25.2s, #0x87
+	uzp1		v30.4s, v30.4s, v25.4s
 	ld1		{v25.16b}, [x24]
 
 99:	adr		x7, \do8
diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c
index 281d2308..ea873b8 100644
--- a/arch/arm64/crypto/aes-neonbs-glue.c
+++ b/arch/arm64/crypto/aes-neonbs-glue.c
@@ -8,13 +8,13 @@
 #include <asm/neon.h>
 #include <asm/simd.h>
 #include <crypto/aes.h>
+#include <crypto/ctr.h>
 #include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
 #include <linux/module.h>
 
-#include "aes-ctr-fallback.h"
-
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
 
@@ -46,6 +46,12 @@
 				     int rounds, int blocks);
 asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[],
 				     int rounds, int blocks, u8 iv[]);
+asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[],
+				     u32 const rk1[], int rounds, int bytes,
+				     u32 const rk2[], u8 iv[], int first);
+asmlinkage void neon_aes_xts_decrypt(u8 out[], u8 const in[],
+				     u32 const rk1[], int rounds, int bytes,
+				     u32 const rk2[], u8 iv[], int first);
 
 struct aesbs_ctx {
 	u8	rk[13 * (8 * AES_BLOCK_SIZE) + 32];
@@ -65,6 +71,7 @@
 struct aesbs_xts_ctx {
 	struct aesbs_ctx	key;
 	u32			twkey[AES_MAX_KEYLENGTH_U32];
+	struct crypto_aes_ctx	cts;
 };
 
 static int aesbs_setkey(struct crypto_skcipher *tfm, const u8 *in_key,
@@ -74,7 +81,7 @@
 	struct crypto_aes_ctx rk;
 	int err;
 
-	err = crypto_aes_expand_key(&rk, in_key, key_len);
+	err = aes_expandkey(&rk, in_key, key_len);
 	if (err)
 		return err;
 
@@ -133,7 +140,7 @@
 	struct crypto_aes_ctx rk;
 	int err;
 
-	err = crypto_aes_expand_key(&rk, in_key, key_len);
+	err = aes_expandkey(&rk, in_key, key_len);
 	if (err)
 		return err;
 
@@ -205,7 +212,7 @@
 	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
 	int err;
 
-	err = crypto_aes_expand_key(&ctx->fallback, in_key, key_len);
+	err = aes_expandkey(&ctx->fallback, in_key, key_len);
 	if (err)
 		return err;
 
@@ -271,7 +278,11 @@
 		return err;
 
 	key_len /= 2;
-	err = crypto_aes_expand_key(&rk, in_key + key_len, key_len);
+	err = aes_expandkey(&ctx->cts, in_key, key_len);
+	if (err)
+		return err;
+
+	err = aes_expandkey(&rk, in_key + key_len, key_len);
 	if (err)
 		return err;
 
@@ -280,59 +291,142 @@
 	return aesbs_setkey(tfm, in_key, key_len);
 }
 
+static void ctr_encrypt_one(struct crypto_skcipher *tfm, const u8 *src, u8 *dst)
+{
+	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
+	unsigned long flags;
+
+	/*
+	 * Temporarily disable interrupts to avoid races where
+	 * cachelines are evicted when the CPU is interrupted
+	 * to do something else.
+	 */
+	local_irq_save(flags);
+	aes_encrypt(&ctx->fallback, dst, src);
+	local_irq_restore(flags);
+}
+
 static int ctr_encrypt_sync(struct skcipher_request *req)
 {
-	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct aesbs_ctr_ctx *ctx = crypto_skcipher_ctx(tfm);
-
 	if (!crypto_simd_usable())
-		return aes_ctr_encrypt_fallback(&ctx->fallback, req);
+		return crypto_ctr_encrypt_walk(req, ctr_encrypt_one);
 
 	return ctr_encrypt(req);
 }
 
-static int __xts_crypt(struct skcipher_request *req,
+static int __xts_crypt(struct skcipher_request *req, bool encrypt,
 		       void (*fn)(u8 out[], u8 const in[], u8 const rk[],
 				  int rounds, int blocks, u8 iv[]))
 {
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct aesbs_xts_ctx *ctx = crypto_skcipher_ctx(tfm);
+	int tail = req->cryptlen % (8 * AES_BLOCK_SIZE);
+	struct scatterlist sg_src[2], sg_dst[2];
+	struct skcipher_request subreq;
+	struct scatterlist *src, *dst;
 	struct skcipher_walk walk;
-	int err;
+	int nbytes, err;
+	int first = 1;
+	u8 *out, *in;
+
+	if (req->cryptlen < AES_BLOCK_SIZE)
+		return -EINVAL;
+
+	/* ensure that the cts tail is covered by a single step */
+	if (unlikely(tail > 0 && tail < AES_BLOCK_SIZE)) {
+		int xts_blocks = DIV_ROUND_UP(req->cryptlen,
+					      AES_BLOCK_SIZE) - 2;
+
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      skcipher_request_flags(req),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   xts_blocks * AES_BLOCK_SIZE,
+					   req->iv);
+		req = &subreq;
+	} else {
+		tail = 0;
+	}
 
 	err = skcipher_walk_virt(&walk, req, false);
 	if (err)
 		return err;
 
-	kernel_neon_begin();
-	neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1);
-	kernel_neon_end();
-
 	while (walk.nbytes >= AES_BLOCK_SIZE) {
 		unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE;
 
-		if (walk.nbytes < walk.total)
+		if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE)
 			blocks = round_down(blocks,
 					    walk.stride / AES_BLOCK_SIZE);
 
+		out = walk.dst.virt.addr;
+		in = walk.src.virt.addr;
+		nbytes = walk.nbytes;
+
 		kernel_neon_begin();
-		fn(walk.dst.virt.addr, walk.src.virt.addr, ctx->key.rk,
-		   ctx->key.rounds, blocks, walk.iv);
+		if (likely(blocks > 6)) { /* plain NEON is faster otherwise */
+			if (first)
+				neon_aes_ecb_encrypt(walk.iv, walk.iv,
+						     ctx->twkey,
+						     ctx->key.rounds, 1);
+			first = 0;
+
+			fn(out, in, ctx->key.rk, ctx->key.rounds, blocks,
+			   walk.iv);
+
+			out += blocks * AES_BLOCK_SIZE;
+			in += blocks * AES_BLOCK_SIZE;
+			nbytes -= blocks * AES_BLOCK_SIZE;
+		}
+
+		if (walk.nbytes == walk.total && nbytes > 0)
+			goto xts_tail;
+
 		kernel_neon_end();
-		err = skcipher_walk_done(&walk,
-					 walk.nbytes - blocks * AES_BLOCK_SIZE);
+		skcipher_walk_done(&walk, nbytes);
 	}
-	return err;
+
+	if (err || likely(!tail))
+		return err;
+
+	/* handle ciphertext stealing */
+	dst = src = scatterwalk_ffwd(sg_src, req->src, req->cryptlen);
+	if (req->dst != req->src)
+		dst = scatterwalk_ffwd(sg_dst, req->dst, req->cryptlen);
+
+	skcipher_request_set_crypt(req, src, dst, AES_BLOCK_SIZE + tail,
+				   req->iv);
+
+	err = skcipher_walk_virt(&walk, req, false);
+	if (err)
+		return err;
+
+	out = walk.dst.virt.addr;
+	in = walk.src.virt.addr;
+	nbytes = walk.nbytes;
+
+	kernel_neon_begin();
+xts_tail:
+	if (encrypt)
+		neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds,
+				     nbytes, ctx->twkey, walk.iv, first ?: 2);
+	else
+		neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds,
+				     nbytes, ctx->twkey, walk.iv, first ?: 2);
+	kernel_neon_end();
+
+	return skcipher_walk_done(&walk, 0);
 }
 
 static int xts_encrypt(struct skcipher_request *req)
 {
-	return __xts_crypt(req, aesbs_xts_encrypt);
+	return __xts_crypt(req, true, aesbs_xts_encrypt);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
 {
-	return __xts_crypt(req, aesbs_xts_decrypt);
+	return __xts_crypt(req, false, aesbs_xts_decrypt);
 }
 
 static struct skcipher_alg aes_algs[] = { {
diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c
index 16c5da9..70b1469 100644
--- a/arch/arm64/crypto/ghash-ce-glue.c
+++ b/arch/arm64/crypto/ghash-ce-glue.c
@@ -70,8 +70,6 @@
 asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[],
 					u32 const rk[], int rounds);
 
-asmlinkage void __aes_arm64_encrypt(u32 *rk, u8 *out, const u8 *in, int rounds);
-
 static int ghash_init(struct shash_desc *desc)
 {
 	struct ghash_desc_ctx *ctx = shash_desc_ctx(desc);
@@ -309,14 +307,13 @@
 	u8 key[GHASH_BLOCK_SIZE];
 	int ret;
 
-	ret = crypto_aes_expand_key(&ctx->aes_key, inkey, keylen);
+	ret = aes_expandkey(&ctx->aes_key, inkey, keylen);
 	if (ret) {
 		tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
 
-	__aes_arm64_encrypt(ctx->aes_key.key_enc, key, (u8[AES_BLOCK_SIZE]){},
-			    num_rounds(&ctx->aes_key));
+	aes_encrypt(&ctx->aes_key, key, (u8[AES_BLOCK_SIZE]){});
 
 	return __ghash_setkey(&ctx->ghash_key, key, sizeof(be128));
 }
@@ -467,7 +464,7 @@
 			rk = ctx->aes_key.key_enc;
 		} while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
 	} else {
-		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds);
+		aes_encrypt(&ctx->aes_key, tag, iv);
 		put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
 		while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
@@ -478,8 +475,7 @@
 			int remaining = blocks;
 
 			do {
-				__aes_arm64_encrypt(ctx->aes_key.key_enc,
-						    ks, iv, nrounds);
+				aes_encrypt(&ctx->aes_key, ks, iv);
 				crypto_xor_cpy(dst, src, ks, AES_BLOCK_SIZE);
 				crypto_inc(iv, AES_BLOCK_SIZE);
 
@@ -495,13 +491,10 @@
 						 walk.nbytes % (2 * AES_BLOCK_SIZE));
 		}
 		if (walk.nbytes) {
-			__aes_arm64_encrypt(ctx->aes_key.key_enc, ks, iv,
-					    nrounds);
+			aes_encrypt(&ctx->aes_key, ks, iv);
 			if (walk.nbytes > AES_BLOCK_SIZE) {
 				crypto_inc(iv, AES_BLOCK_SIZE);
-				__aes_arm64_encrypt(ctx->aes_key.key_enc,
-					            ks + AES_BLOCK_SIZE, iv,
-						    nrounds);
+				aes_encrypt(&ctx->aes_key, ks + AES_BLOCK_SIZE, iv);
 			}
 		}
 	}
@@ -605,7 +598,7 @@
 			rk = ctx->aes_key.key_enc;
 		} while (walk.nbytes >= 2 * AES_BLOCK_SIZE);
 	} else {
-		__aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, nrounds);
+		aes_encrypt(&ctx->aes_key, tag, iv);
 		put_unaligned_be32(2, iv + GCM_IV_SIZE);
 
 		while (walk.nbytes >= (2 * AES_BLOCK_SIZE)) {
@@ -618,8 +611,7 @@
 					pmull_ghash_update_p64);
 
 			do {
-				__aes_arm64_encrypt(ctx->aes_key.key_enc,
-						    buf, iv, nrounds);
+				aes_encrypt(&ctx->aes_key, buf, iv);
 				crypto_xor_cpy(dst, src, buf, AES_BLOCK_SIZE);
 				crypto_inc(iv, AES_BLOCK_SIZE);
 
@@ -637,11 +629,9 @@
 				memcpy(iv2, iv, AES_BLOCK_SIZE);
 				crypto_inc(iv2, AES_BLOCK_SIZE);
 
-				__aes_arm64_encrypt(ctx->aes_key.key_enc, iv2,
-						    iv2, nrounds);
+				aes_encrypt(&ctx->aes_key, iv2, iv2);
 			}
-			__aes_arm64_encrypt(ctx->aes_key.key_enc, iv, iv,
-					    nrounds);
+			aes_encrypt(&ctx->aes_key, iv, iv);
 		}
 	}
 
diff --git a/arch/arm64/crypto/sha256-glue.c b/arch/arm64/crypto/sha256-glue.c
index 04b9d17..e273fac 100644
--- a/arch/arm64/crypto/sha256-glue.c
+++ b/arch/arm64/crypto/sha256-glue.c
@@ -30,15 +30,15 @@
 asmlinkage void sha256_block_neon(u32 *digest, const void *data,
 				  unsigned int num_blks);
 
-static int sha256_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int len)
+static int crypto_sha256_arm64_update(struct shash_desc *desc, const u8 *data,
+				      unsigned int len)
 {
 	return sha256_base_do_update(desc, data, len,
 				(sha256_block_fn *)sha256_block_data_order);
 }
 
-static int sha256_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
+static int crypto_sha256_arm64_finup(struct shash_desc *desc, const u8 *data,
+				     unsigned int len, u8 *out)
 {
 	if (len)
 		sha256_base_do_update(desc, data, len,
@@ -49,17 +49,17 @@
 	return sha256_base_finish(desc, out);
 }
 
-static int sha256_final(struct shash_desc *desc, u8 *out)
+static int crypto_sha256_arm64_final(struct shash_desc *desc, u8 *out)
 {
-	return sha256_finup(desc, NULL, 0, out);
+	return crypto_sha256_arm64_finup(desc, NULL, 0, out);
 }
 
 static struct shash_alg algs[] = { {
 	.digestsize		= SHA256_DIGEST_SIZE,
 	.init			= sha256_base_init,
-	.update			= sha256_update,
-	.final			= sha256_final,
-	.finup			= sha256_finup,
+	.update			= crypto_sha256_arm64_update,
+	.final			= crypto_sha256_arm64_final,
+	.finup			= crypto_sha256_arm64_finup,
 	.descsize		= sizeof(struct sha256_state),
 	.base.cra_name		= "sha256",
 	.base.cra_driver_name	= "sha256-arm64",
@@ -69,9 +69,9 @@
 }, {
 	.digestsize		= SHA224_DIGEST_SIZE,
 	.init			= sha224_base_init,
-	.update			= sha256_update,
-	.final			= sha256_final,
-	.finup			= sha256_finup,
+	.update			= crypto_sha256_arm64_update,
+	.final			= crypto_sha256_arm64_final,
+	.finup			= crypto_sha256_arm64_finup,
 	.descsize		= sizeof(struct sha256_state),
 	.base.cra_name		= "sha224",
 	.base.cra_driver_name	= "sha224-arm64",
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 9a1d2fc..64870c7 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -11,4 +11,3 @@
 generic-y += preempt.h
 generic-y += vtime.h
 generic-y += msi.h
-generic-y += simd.h
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 6d2dbb5..9803e96 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -108,7 +108,7 @@
 	return 0;
 }
 
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
@@ -119,7 +119,7 @@
 	cpacf_km(sctx->fc, &sctx->key, out, in, AES_BLOCK_SIZE);
 }
 
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
@@ -172,8 +172,8 @@
 			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
 			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
 			.cia_setkey		=	aes_set_key,
-			.cia_encrypt		=	aes_encrypt,
-			.cia_decrypt		=	aes_decrypt,
+			.cia_encrypt		=	crypto_aes_encrypt,
+			.cia_decrypt		=	crypto_aes_decrypt,
 		}
 	}
 };
@@ -512,7 +512,7 @@
 	unsigned long fc;
 	int err;
 
-	err = xts_check_key(tfm, in_key, key_len);
+	err = xts_fallback_setkey(tfm, in_key, key_len);
 	if (err)
 		return err;
 
@@ -529,7 +529,7 @@
 	/* Check if the function code is available */
 	xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
 	if (!xts_ctx->fc)
-		return xts_fallback_setkey(tfm, in_key, key_len);
+		return 0;
 
 	/* Split the XTS key into the two subkeys */
 	key_len = key_len / 2;
@@ -589,7 +589,7 @@
 	if (!nbytes)
 		return -EINVAL;
 
-	if (unlikely(!xts_ctx->fc))
+	if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
 		return xts_fallback_encrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
@@ -606,7 +606,7 @@
 	if (!nbytes)
 		return -EINVAL;
 
-	if (unlikely(!xts_ctx->fc))
+	if (unlikely(!xts_ctx->fc || (nbytes % XTS_BLOCK_SIZE) != 0))
 		return xts_fallback_decrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 374b42f..439b100 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -16,7 +16,7 @@
 #include <linux/fips.h>
 #include <linux/mutex.h>
 #include <crypto/algapi.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <asm/cpacf.h>
 
 #define DES3_KEY_SIZE	(3 * DES_KEY_SIZE)
@@ -35,27 +35,24 @@
 		      unsigned int key_len)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 tmp[DES_EXPKEY_WORDS];
+	int err;
 
-	/* check for weak keys */
-	if (!des_ekey(tmp, key) &&
-	    (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
+	err = crypto_des_verify_key(tfm, key);
+	if (err)
+		return err;
 
 	memcpy(ctx->key, key, key_len);
 	return 0;
 }
 
-static void des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void s390_des_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	cpacf_km(CPACF_KM_DEA, ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
-static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void s390_des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
@@ -76,8 +73,8 @@
 			.cia_min_keysize	=	DES_KEY_SIZE,
 			.cia_max_keysize	=	DES_KEY_SIZE,
 			.cia_setkey		=	des_setkey,
-			.cia_encrypt		=	des_encrypt,
-			.cia_decrypt		=	des_decrypt,
+			.cia_encrypt		=	s390_des_encrypt,
+			.cia_decrypt		=	s390_des_decrypt,
 		}
 	}
 };
@@ -227,8 +224,8 @@
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 	int err;
 
-	err = __des3_verify_key(&tfm->crt_flags, key);
-	if (unlikely(err))
+	err = crypto_des3_ede_verify_key(tfm, key);
+	if (err)
 		return err;
 
 	memcpy(ctx->key, key, key_len);
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index eeeb6a7..a3e7400e0 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -153,4 +153,4 @@
 MODULE_ALIAS_CRYPTO("ghash");
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation");
+MODULE_DESCRIPTION("GHASH hash function, s390 implementation");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index af75051..b52c87e4 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -17,7 +17,7 @@
 
 #include "sha.h"
 
-static int sha256_init(struct shash_desc *desc)
+static int s390_sha256_init(struct shash_desc *desc)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 
@@ -60,7 +60,7 @@
 
 static struct shash_alg sha256_alg = {
 	.digestsize	=	SHA256_DIGEST_SIZE,
-	.init		=	sha256_init,
+	.init		=	s390_sha256_init,
 	.update		=	s390_sha_update,
 	.final		=	s390_sha_final,
 	.export		=	sha256_export,
@@ -76,7 +76,7 @@
 	}
 };
 
-static int sha224_init(struct shash_desc *desc)
+static int s390_sha224_init(struct shash_desc *desc)
 {
 	struct s390_sha_ctx *sctx = shash_desc_ctx(desc);
 
@@ -96,7 +96,7 @@
 
 static struct shash_alg sha224_alg = {
 	.digestsize	=	SHA224_DIGEST_SIZE,
-	.init		=	sha224_init,
+	.init		=	s390_sha224_init,
 	.update		=	s390_sha_update,
 	.final		=	s390_sha_final,
 	.export		=	sha256_export,
diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile
index dc1ae4f..bc0d7a0 100644
--- a/arch/s390/purgatory/Makefile
+++ b/arch/s390/purgatory/Makefile
@@ -7,9 +7,11 @@
 targets += $(purgatory-y) purgatory.lds purgatory purgatory.ro
 PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
 
-$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
+$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+
 $(obj)/mem.o: $(srctree)/arch/s390/lib/mem.S FORCE
 	$(call if_changed_rule,as_o_S)
 
diff --git a/arch/s390/purgatory/purgatory.c b/arch/s390/purgatory/purgatory.c
index 3528e6d..0a423bc 100644
--- a/arch/s390/purgatory/purgatory.c
+++ b/arch/s390/purgatory/purgatory.c
@@ -8,8 +8,8 @@
  */
 
 #include <linux/kexec.h>
-#include <linux/sha256.h>
 #include <linux/string.h>
+#include <crypto/sha.h>
 #include <asm/purgatory.h>
 
 int verify_sha256_digest(void)
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index 57b4741..7b946b3 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -197,14 +197,14 @@
 	return 0;
 }
 
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
 	ctx->ops->encrypt(&ctx->key[0], (const u32 *) src, (u32 *) dst);
 }
 
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_sparc64_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 
@@ -396,8 +396,8 @@
 			.cia_min_keysize	= AES_MIN_KEY_SIZE,
 			.cia_max_keysize	= AES_MAX_KEY_SIZE,
 			.cia_setkey		= aes_set_key,
-			.cia_encrypt		= aes_encrypt,
-			.cia_decrypt		= aes_decrypt
+			.cia_encrypt		= crypto_aes_encrypt,
+			.cia_decrypt		= crypto_aes_decrypt
 		}
 	}
 }, {
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index 281448f..db6010b 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -12,7 +12,7 @@
 #include <linux/mm.h>
 #include <linux/types.h>
 #include <crypto/algapi.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 
 #include <asm/fpumacro.h>
 #include <asm/pstate.h>
@@ -45,19 +45,15 @@
 		       unsigned int keylen)
 {
 	struct des_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	u32 tmp[DES_EXPKEY_WORDS];
-	int ret;
+	int err;
 
 	/* Even though we have special instructions for key expansion,
-	 * we call des_ekey() so that we don't have to write our own
+	 * we call des_verify_key() so that we don't have to write our own
 	 * weak key detection code.
 	 */
-	ret = des_ekey(tmp, key);
-	if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
+	err = crypto_des_verify_key(tfm, key);
+	if (err)
+		return err;
 
 	des_sparc64_key_expand((const u32 *) key, &dctx->encrypt_expkey[0]);
 	encrypt_to_decrypt(&dctx->decrypt_expkey[0], &dctx->encrypt_expkey[0]);
@@ -68,7 +64,7 @@
 extern void des_sparc64_crypt(const u64 *key, const u64 *input,
 			      u64 *output);
 
-static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void sparc_des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
 	const u64 *K = ctx->encrypt_expkey;
@@ -76,7 +72,7 @@
 	des_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
 }
 
-static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void sparc_des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct des_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
 	const u64 *K = ctx->decrypt_expkey;
@@ -202,14 +198,13 @@
 			    unsigned int keylen)
 {
 	struct des3_ede_sparc64_ctx *dctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
 	u64 k1[DES_EXPKEY_WORDS / 2];
 	u64 k2[DES_EXPKEY_WORDS / 2];
 	u64 k3[DES_EXPKEY_WORDS / 2];
 	int err;
 
-	err = __des3_verify_key(flags, key);
-	if (unlikely(err))
+	err = crypto_des3_ede_verify_key(tfm, key);
+	if (err)
 		return err;
 
 	des_sparc64_key_expand((const u32 *)key, k1);
@@ -235,7 +230,7 @@
 extern void des3_ede_sparc64_crypt(const u64 *key, const u64 *input,
 				   u64 *output);
 
-static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void sparc_des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
 	const u64 *K = ctx->encrypt_expkey;
@@ -243,7 +238,7 @@
 	des3_ede_sparc64_crypt(K, (const u64 *) src, (u64 *) dst);
 }
 
-static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void sparc_des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct des3_ede_sparc64_ctx *ctx = crypto_tfm_ctx(tfm);
 	const u64 *K = ctx->decrypt_expkey;
@@ -390,8 +385,8 @@
 			.cia_min_keysize	= DES_KEY_SIZE,
 			.cia_max_keysize	= DES_KEY_SIZE,
 			.cia_setkey		= des_set_key,
-			.cia_encrypt		= des_encrypt,
-			.cia_decrypt		= des_decrypt
+			.cia_encrypt		= sparc_des_encrypt,
+			.cia_decrypt		= sparc_des_decrypt
 		}
 	}
 }, {
@@ -447,8 +442,8 @@
 			.cia_min_keysize	= DES3_EDE_KEY_SIZE,
 			.cia_max_keysize	= DES3_EDE_KEY_SIZE,
 			.cia_setkey		= des3_ede_set_key,
-			.cia_encrypt		= des3_ede_encrypt,
-			.cia_decrypt		= des3_ede_decrypt
+			.cia_encrypt		= sparc_des3_ede_encrypt,
+			.cia_decrypt		= sparc_des3_ede_decrypt
 		}
 	}
 }, {
diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
index 45734e1..759b1a9 100644
--- a/arch/x86/crypto/Makefile
+++ b/arch/x86/crypto/Makefile
@@ -14,11 +14,9 @@
 
 obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
 
-obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o
 obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o
 obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o
 
-obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o
 obj-$(CONFIG_CRYPTO_DES3_EDE_X86_64) += des3_ede-x86_64.o
 obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o
 obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o
@@ -38,14 +36,6 @@
 obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o
 
 obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o
-obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o
-obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o
-
-obj-$(CONFIG_CRYPTO_MORUS640_GLUE) += morus640_glue.o
-obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o
-
-obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o
-obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o
 
 obj-$(CONFIG_CRYPTO_NHPOLY1305_SSE2) += nhpoly1305-sse2.o
 obj-$(CONFIG_CRYPTO_NHPOLY1305_AVX2) += nhpoly1305-avx2.o
@@ -64,15 +54,11 @@
 ifeq ($(avx2_supported),yes)
 	obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
 	obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
-
-	obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o
 endif
 
-aes-i586-y := aes-i586-asm_32.o aes_glue.o
 twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o
 serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o
 
-aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o
 des3_ede-x86_64-y := des3_ede-asm_64.o des3_ede_glue.o
 camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o
 blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o
@@ -82,11 +68,6 @@
 serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o
 
 aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o
-aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o
-aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o
-
-morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o
-morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o
 
 nhpoly1305-sse2-y := nh-sse2-x86_64.o nhpoly1305-sse2-glue.o
 
@@ -106,8 +87,6 @@
 	chacha-x86_64-y += chacha-avx2-x86_64.o
 	serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
 
-	morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o
-
 	nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o
 endif
 
diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S
deleted file mode 100644
index 1461ef0..0000000
--- a/arch/x86/crypto/aegis128l-aesni-asm.S
+++ /dev/null
@@ -1,823 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AES-NI + SSE2 implementation of AEGIS-128L
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define STATE0	%xmm0
-#define STATE1	%xmm1
-#define STATE2	%xmm2
-#define STATE3	%xmm3
-#define STATE4	%xmm4
-#define STATE5	%xmm5
-#define STATE6	%xmm6
-#define STATE7	%xmm7
-#define MSG0	%xmm8
-#define MSG1	%xmm9
-#define T0	%xmm10
-#define T1	%xmm11
-#define T2	%xmm12
-#define T3	%xmm13
-
-#define STATEP	%rdi
-#define LEN	%rsi
-#define SRC	%rdx
-#define DST	%rcx
-
-.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32
-.align 16
-.Laegis128l_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Laegis128l_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16
-.align 16
-.Laegis128l_counter0:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-.Laegis128l_counter1:
-	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
-
-.text
-
-/*
- * __load_partial: internal ABI
- * input:
- *   LEN - bytes
- *   SRC - src
- * output:
- *   MSG0 - first message block
- *   MSG1 - second message block
- * changed:
- *   T0
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG0, MSG0
-	pxor MSG1, MSG1
-
-	mov LEN, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov LEN, %r8
-	and $0x1E, %r8
-	add SRC, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov LEN, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov LEN, %r8
-	and $0x1C, %r8
-	add SRC, %r8
-	shl $0x10, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov LEN, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov LEN, %r8
-	and $0x18, %r8
-	add SRC, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG0
-
-	mov LEN, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov LEN, %r8
-	and $0x10, %r8
-	add SRC, %r8
-	pslldq $8, MSG0
-	movq (%r8), T0
-	pxor T0, MSG0
-
-.Lld_partial_8:
-	mov LEN, %r8
-	and $0x10, %r8
-	jz .Lld_partial_16
-
-	movdqa MSG0, MSG1
-	movdqu (SRC), MSG0
-
-.Lld_partial_16:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   LEN - bytes
- *   DST - dst
- * output:
- *   T0   - first message block
- *   T1   - second message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov LEN, %r8
-	mov DST, %r9
-
-	cmp $16, %r8
-	jl .Lst_partial_16
-
-	movdqu T0, (%r9)
-	movdqa T1, T0
-
-	sub $16, %r8
-	add $16, %r9
-
-.Lst_partial_16:
-	movq T0, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0
-	movq T0, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $0x10, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-.macro update
-	movdqa STATE7, T0
-	aesenc STATE0, STATE7
-	aesenc STATE1, STATE0
-	aesenc STATE2, STATE1
-	aesenc STATE3, STATE2
-	aesenc STATE4, STATE3
-	aesenc STATE5, STATE4
-	aesenc STATE6, STATE5
-	aesenc T0,     STATE6
-.endm
-
-.macro update0
-	update
-	pxor MSG0, STATE7
-	pxor MSG1, STATE3
-.endm
-
-.macro update1
-	update
-	pxor MSG0, STATE6
-	pxor MSG1, STATE2
-.endm
-
-.macro update2
-	update
-	pxor MSG0, STATE5
-	pxor MSG1, STATE1
-.endm
-
-.macro update3
-	update
-	pxor MSG0, STATE4
-	pxor MSG1, STATE0
-.endm
-
-.macro update4
-	update
-	pxor MSG0, STATE3
-	pxor MSG1, STATE7
-.endm
-
-.macro update5
-	update
-	pxor MSG0, STATE2
-	pxor MSG1, STATE6
-.endm
-
-.macro update6
-	update
-	pxor MSG0, STATE1
-	pxor MSG1, STATE5
-.endm
-
-.macro update7
-	update
-	pxor MSG0, STATE0
-	pxor MSG1, STATE4
-.endm
-
-.macro state_load
-	movdqu 0x00(STATEP), STATE0
-	movdqu 0x10(STATEP), STATE1
-	movdqu 0x20(STATEP), STATE2
-	movdqu 0x30(STATEP), STATE3
-	movdqu 0x40(STATEP), STATE4
-	movdqu 0x50(STATEP), STATE5
-	movdqu 0x60(STATEP), STATE6
-	movdqu 0x70(STATEP), STATE7
-.endm
-
-.macro state_store s0 s1 s2 s3 s4 s5 s6 s7
-	movdqu \s7, 0x00(STATEP)
-	movdqu \s0, 0x10(STATEP)
-	movdqu \s1, 0x20(STATEP)
-	movdqu \s2, 0x30(STATEP)
-	movdqu \s3, 0x40(STATEP)
-	movdqu \s4, 0x50(STATEP)
-	movdqu \s5, 0x60(STATEP)
-	movdqu \s6, 0x70(STATEP)
-.endm
-
-.macro state_store0
-	state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
-.endm
-
-.macro state_store1
-	state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
-.endm
-
-.macro state_store2
-	state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
-.endm
-
-.macro state_store3
-	state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
-.endm
-
-.macro state_store4
-	state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
-.endm
-
-.macro state_store5
-	state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
-.endm
-
-.macro state_store6
-	state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
-.endm
-
-.macro state_store7
-	state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
-.endm
-
-/*
- * void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv);
- */
-ENTRY(crypto_aegis128l_aesni_init)
-	FRAME_BEGIN
-
-	/* load key: */
-	movdqa (%rsi), MSG1
-	movdqa MSG1, STATE0
-	movdqa MSG1, STATE4
-	movdqa MSG1, STATE5
-	movdqa MSG1, STATE6
-	movdqa MSG1, STATE7
-
-	/* load IV: */
-	movdqu (%rdx), MSG0
-	pxor MSG0, STATE0
-	pxor MSG0, STATE4
-
-	/* load the constants: */
-	movdqa .Laegis128l_const_0, STATE2
-	movdqa .Laegis128l_const_1, STATE1
-	movdqa STATE1, STATE3
-	pxor STATE2, STATE5
-	pxor STATE1, STATE6
-	pxor STATE2, STATE7
-
-	/* update 10 times with IV and KEY: */
-	update0
-	update1
-	update2
-	update3
-	update4
-	update5
-	update6
-	update7
-	update0
-	update1
-
-	state_store1
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_init)
-
-.macro ad_block a i
-	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
-	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
-	update\i
-	sub $0x20, LEN
-	cmp $0x20, LEN
-	jl .Lad_out_\i
-.endm
-
-/*
- * void crypto_aegis128l_aesni_ad(void *state, unsigned int length,
- *                                const void *data);
- */
-ENTRY(crypto_aegis128l_aesni_ad)
-	FRAME_BEGIN
-
-	cmp $0x20, LEN
-	jb .Lad_out
-
-	state_load
-
-	mov  SRC, %r8
-	and $0xf, %r8
-	jnz .Lad_u_loop
-
-.align 8
-.Lad_a_loop:
-	ad_block a 0
-	ad_block a 1
-	ad_block a 2
-	ad_block a 3
-	ad_block a 4
-	ad_block a 5
-	ad_block a 6
-	ad_block a 7
-
-	add $0x100, SRC
-	jmp .Lad_a_loop
-
-.align 8
-.Lad_u_loop:
-	ad_block u 0
-	ad_block u 1
-	ad_block u 2
-	ad_block u 3
-	ad_block u 4
-	ad_block u 5
-	ad_block u 6
-	ad_block u 7
-
-	add $0x100, SRC
-	jmp .Lad_u_loop
-
-.Lad_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Lad_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Lad_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Lad_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Lad_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Lad_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Lad_out_6:
-	state_store6
-	FRAME_END
-	ret
-
-.Lad_out_7:
-	state_store7
-	FRAME_END
-	ret
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_ad)
-
-.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7
-	pxor \s1, \m0
-	pxor \s6, \m0
-	movdqa \s2, T3
-	pand \s3, T3
-	pxor T3, \m0
-
-	pxor \s2, \m1
-	pxor \s5, \m1
-	movdqa \s6, T3
-	pand \s7, T3
-	pxor T3, \m1
-.endm
-
-.macro crypt0 m0 m1
-	crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7
-.endm
-
-.macro crypt1 m0 m1
-	crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6
-.endm
-
-.macro crypt2 m0 m1
-	crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
-.endm
-
-.macro crypt3 m0 m1
-	crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4
-.endm
-
-.macro crypt4 m0 m1
-	crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3
-.endm
-
-.macro crypt5 m0 m1
-	crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2
-.endm
-
-.macro crypt6 m0 m1
-	crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1
-.endm
-
-.macro crypt7 m0 m1
-	crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0
-.endm
-
-.macro encrypt_block a i
-	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
-	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
-	movdqa MSG0, T0
-	movdqa MSG1, T1
-	crypt\i T0, T1
-	movdq\a T0, (\i * 0x20 + 0x00)(DST)
-	movdq\a T1, (\i * 0x20 + 0x10)(DST)
-
-	update\i
-
-	sub $0x20, LEN
-	cmp $0x20, LEN
-	jl .Lenc_out_\i
-.endm
-
-.macro decrypt_block a i
-	movdq\a (\i * 0x20 + 0x00)(SRC), MSG0
-	movdq\a (\i * 0x20 + 0x10)(SRC), MSG1
-	crypt\i MSG0, MSG1
-	movdq\a MSG0, (\i * 0x20 + 0x00)(DST)
-	movdq\a MSG1, (\i * 0x20 + 0x10)(DST)
-
-	update\i
-
-	sub $0x20, LEN
-	cmp $0x20, LEN
-	jl .Ldec_out_\i
-.endm
-
-/*
- * void crypto_aegis128l_aesni_enc(void *state, unsigned int length,
- *                                 const void *src, void *dst);
- */
-ENTRY(crypto_aegis128l_aesni_enc)
-	FRAME_BEGIN
-
-	cmp $0x20, LEN
-	jb .Lenc_out
-
-	state_load
-
-	mov  SRC, %r8
-	or   DST, %r8
-	and $0xf, %r8
-	jnz .Lenc_u_loop
-
-.align 8
-.Lenc_a_loop:
-	encrypt_block a 0
-	encrypt_block a 1
-	encrypt_block a 2
-	encrypt_block a 3
-	encrypt_block a 4
-	encrypt_block a 5
-	encrypt_block a 6
-	encrypt_block a 7
-
-	add $0x100, SRC
-	add $0x100, DST
-	jmp .Lenc_a_loop
-
-.align 8
-.Lenc_u_loop:
-	encrypt_block u 0
-	encrypt_block u 1
-	encrypt_block u 2
-	encrypt_block u 3
-	encrypt_block u 4
-	encrypt_block u 5
-	encrypt_block u 6
-	encrypt_block u 7
-
-	add $0x100, SRC
-	add $0x100, DST
-	jmp .Lenc_u_loop
-
-.Lenc_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Lenc_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Lenc_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Lenc_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Lenc_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Lenc_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Lenc_out_6:
-	state_store6
-	FRAME_END
-	ret
-
-.Lenc_out_7:
-	state_store7
-	FRAME_END
-	ret
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_enc)
-
-/*
- * void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length,
- *                                      const void *src, void *dst);
- */
-ENTRY(crypto_aegis128l_aesni_enc_tail)
-	FRAME_BEGIN
-
-	state_load
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa MSG0, T0
-	movdqa MSG1, T1
-	crypt0 T0, T1
-
-	call __store_partial
-
-	update0
-
-	state_store0
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_enc_tail)
-
-/*
- * void crypto_aegis128l_aesni_dec(void *state, unsigned int length,
- *                                 const void *src, void *dst);
- */
-ENTRY(crypto_aegis128l_aesni_dec)
-	FRAME_BEGIN
-
-	cmp $0x20, LEN
-	jb .Ldec_out
-
-	state_load
-
-	mov  SRC, %r8
-	or   DST, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 8
-.Ldec_a_loop:
-	decrypt_block a 0
-	decrypt_block a 1
-	decrypt_block a 2
-	decrypt_block a 3
-	decrypt_block a 4
-	decrypt_block a 5
-	decrypt_block a 6
-	decrypt_block a 7
-
-	add $0x100, SRC
-	add $0x100, DST
-	jmp .Ldec_a_loop
-
-.align 8
-.Ldec_u_loop:
-	decrypt_block u 0
-	decrypt_block u 1
-	decrypt_block u 2
-	decrypt_block u 3
-	decrypt_block u 4
-	decrypt_block u 5
-	decrypt_block u 6
-	decrypt_block u 7
-
-	add $0x100, SRC
-	add $0x100, DST
-	jmp .Ldec_u_loop
-
-.Ldec_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Ldec_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Ldec_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Ldec_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Ldec_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Ldec_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Ldec_out_6:
-	state_store6
-	FRAME_END
-	ret
-
-.Ldec_out_7:
-	state_store7
-	FRAME_END
-	ret
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_dec)
-
-/*
- * void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length,
- *                                      const void *src, void *dst);
- */
-ENTRY(crypto_aegis128l_aesni_dec_tail)
-	FRAME_BEGIN
-
-	state_load
-
-	/* decrypt message: */
-	call __load_partial
-
-	crypt0 MSG0, MSG1
-
-	movdqa MSG0, T0
-	movdqa MSG1, T1
-	call __store_partial
-
-	/* mask with byte count: */
-	movq LEN, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	movdqa T0, T1
-	movdqa .Laegis128l_counter0, T2
-	movdqa .Laegis128l_counter1, T3
-	pcmpgtb T2, T0
-	pcmpgtb T3, T1
-	pand T0, MSG0
-	pand T1, MSG1
-
-	update0
-
-	state_store0
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_dec_tail)
-
-/*
- * void crypto_aegis128l_aesni_final(void *state, void *tag_xor,
- *                                   u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_aegis128l_aesni_final)
-	FRAME_BEGIN
-
-	state_load
-
-	/* prepare length block: */
-	movq %rdx, MSG0
-	movq %rcx, T0
-	pslldq $8, T0
-	pxor T0, MSG0
-	psllq $3, MSG0 /* multiply by 8 (to get bit count) */
-
-	pxor STATE2, MSG0
-	movdqa MSG0, MSG1
-
-	/* update state: */
-	update0
-	update1
-	update2
-	update3
-	update4
-	update5
-	update6
-
-	/* xor tag: */
-	movdqu (%rsi), T0
-
-	pxor STATE1, T0
-	pxor STATE2, T0
-	pxor STATE3, T0
-	pxor STATE4, T0
-	pxor STATE5, T0
-	pxor STATE6, T0
-	pxor STATE7, T0
-
-	movdqu T0, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis128l_aesni_final)
diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c
deleted file mode 100644
index 19eb28b..0000000
--- a/arch/x86/crypto/aegis128l-aesni-glue.c
+++ /dev/null
@@ -1,293 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The AEGIS-128L Authenticated-Encryption Algorithm
- *   Glue for AES-NI + SSE2 implementation
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-#define AEGIS128L_BLOCK_ALIGN 16
-#define AEGIS128L_BLOCK_SIZE 32
-#define AEGIS128L_NONCE_SIZE 16
-#define AEGIS128L_STATE_BLOCKS 8
-#define AEGIS128L_KEY_SIZE 16
-#define AEGIS128L_MIN_AUTH_SIZE 8
-#define AEGIS128L_MAX_AUTH_SIZE 16
-
-asmlinkage void crypto_aegis128l_aesni_init(void *state, void *key, void *iv);
-
-asmlinkage void crypto_aegis128l_aesni_ad(
-		void *state, unsigned int length, const void *data);
-
-asmlinkage void crypto_aegis128l_aesni_enc(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128l_aesni_dec(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128l_aesni_enc_tail(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128l_aesni_dec_tail(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis128l_aesni_final(
-		void *state, void *tag_xor, unsigned int cryptlen,
-		unsigned int assoclen);
-
-struct aegis_block {
-	u8 bytes[AEGIS128L_BLOCK_SIZE] __aligned(AEGIS128L_BLOCK_ALIGN);
-};
-
-struct aegis_state {
-	struct aegis_block blocks[AEGIS128L_STATE_BLOCKS];
-};
-
-struct aegis_ctx {
-	struct aegis_block key;
-};
-
-struct aegis_crypt_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_blocks)(void *state, unsigned int length, const void *src,
-			     void *dst);
-	void (*crypt_tail)(void *state, unsigned int length, const void *src,
-			   void *dst);
-};
-
-static void crypto_aegis128l_aesni_process_ad(
-		struct aegis_state *state, struct scatterlist *sg_src,
-		unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct aegis_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= AEGIS128L_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = AEGIS128L_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				crypto_aegis128l_aesni_ad(state,
-							  AEGIS128L_BLOCK_SIZE,
-							  buf.bytes);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			crypto_aegis128l_aesni_ad(state, left, src);
-
-			src += left & ~(AEGIS128L_BLOCK_SIZE - 1);
-			left &= AEGIS128L_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-		pos += left;
-		assoclen -= size;
-
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, AEGIS128L_BLOCK_SIZE - pos);
-		crypto_aegis128l_aesni_ad(state, AEGIS128L_BLOCK_SIZE, buf.bytes);
-	}
-}
-
-static void crypto_aegis128l_aesni_process_crypt(
-		struct aegis_state *state, struct skcipher_walk *walk,
-		const struct aegis_crypt_ops *ops)
-{
-	while (walk->nbytes >= AEGIS128L_BLOCK_SIZE) {
-		ops->crypt_blocks(state, round_down(walk->nbytes,
-						    AEGIS128L_BLOCK_SIZE),
-				  walk->src.virt.addr, walk->dst.virt.addr);
-		skcipher_walk_done(walk, walk->nbytes % AEGIS128L_BLOCK_SIZE);
-	}
-
-	if (walk->nbytes) {
-		ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
-				walk->dst.virt.addr);
-		skcipher_walk_done(walk, 0);
-	}
-}
-
-static struct aegis_ctx *crypto_aegis128l_aesni_ctx(struct crypto_aead *aead)
-{
-	u8 *ctx = crypto_aead_ctx(aead);
-	ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
-	return (void *)ctx;
-}
-
-static int crypto_aegis128l_aesni_setkey(struct crypto_aead *aead,
-					 const u8 *key, unsigned int keylen)
-{
-	struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(aead);
-
-	if (keylen != AEGIS128L_KEY_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
-
-	return 0;
-}
-
-static int crypto_aegis128l_aesni_setauthsize(struct crypto_aead *tfm,
-					      unsigned int authsize)
-{
-	if (authsize > AEGIS128L_MAX_AUTH_SIZE)
-		return -EINVAL;
-	if (authsize < AEGIS128L_MIN_AUTH_SIZE)
-		return -EINVAL;
-	return 0;
-}
-
-static void crypto_aegis128l_aesni_crypt(struct aead_request *req,
-					 struct aegis_block *tag_xor,
-					 unsigned int cryptlen,
-					 const struct aegis_crypt_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm);
-	struct skcipher_walk walk;
-	struct aegis_state state;
-
-	ops->skcipher_walk_init(&walk, req, true);
-
-	kernel_fpu_begin();
-
-	crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv);
-	crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen);
-	crypto_aegis128l_aesni_process_crypt(&state, &walk, ops);
-	crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
-
-	kernel_fpu_end();
-}
-
-static int crypto_aegis128l_aesni_encrypt(struct aead_request *req)
-{
-	static const struct aegis_crypt_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_blocks = crypto_aegis128l_aesni_enc,
-		.crypt_tail = crypto_aegis128l_aesni_enc_tail,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-
-static int crypto_aegis128l_aesni_decrypt(struct aead_request *req)
-{
-	static const struct aegis_block zeros = {};
-
-	static const struct aegis_crypt_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_blocks = crypto_aegis128l_aesni_dec,
-		.crypt_tail = crypto_aegis128l_aesni_dec_tail,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS);
-
-	return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
-}
-
-static int crypto_aegis128l_aesni_init_tfm(struct crypto_aead *aead)
-{
-	return 0;
-}
-
-static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead)
-{
-}
-
-static struct aead_alg crypto_aegis128l_aesni_alg = {
-	.setkey = crypto_aegis128l_aesni_setkey,
-	.setauthsize = crypto_aegis128l_aesni_setauthsize,
-	.encrypt = crypto_aegis128l_aesni_encrypt,
-	.decrypt = crypto_aegis128l_aesni_decrypt,
-	.init = crypto_aegis128l_aesni_init_tfm,
-	.exit = crypto_aegis128l_aesni_exit_tfm,
-
-	.ivsize = AEGIS128L_NONCE_SIZE,
-	.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
-	.chunksize = AEGIS128L_BLOCK_SIZE,
-
-	.base = {
-		.cra_flags = CRYPTO_ALG_INTERNAL,
-		.cra_blocksize = 1,
-		.cra_ctxsize = sizeof(struct aegis_ctx) +
-			       __alignof__(struct aegis_ctx),
-		.cra_alignmask = 0,
-		.cra_priority = 400,
-
-		.cra_name = "__aegis128l",
-		.cra_driver_name = "__aegis128l-aesni",
-
-		.cra_module = THIS_MODULE,
-	}
-};
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_aegis128l_aesni_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
-	    !boot_cpu_has(X86_FEATURE_AES) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_aegis128l_aesni_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_aegis128l_aesni_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_aegis128l_aesni_alg, 1, &simd_alg);
-}
-
-module_init(crypto_aegis128l_aesni_module_init);
-module_exit(crypto_aegis128l_aesni_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm -- AESNI+SSE2 implementation");
-MODULE_ALIAS_CRYPTO("aegis128l");
-MODULE_ALIAS_CRYPTO("aegis128l-aesni");
diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S
deleted file mode 100644
index 37d9b13..0000000
--- a/arch/x86/crypto/aegis256-aesni-asm.S
+++ /dev/null
@@ -1,700 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AES-NI + SSE2 implementation of AEGIS-128L
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define STATE0	%xmm0
-#define STATE1	%xmm1
-#define STATE2	%xmm2
-#define STATE3	%xmm3
-#define STATE4	%xmm4
-#define STATE5	%xmm5
-#define MSG	%xmm6
-#define T0	%xmm7
-#define T1	%xmm8
-#define T2	%xmm9
-#define T3	%xmm10
-
-#define STATEP	%rdi
-#define LEN	%rsi
-#define SRC	%rdx
-#define DST	%rcx
-
-.section .rodata.cst16.aegis256_const, "aM", @progbits, 32
-.align 16
-.Laegis256_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Laegis256_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16
-.align 16
-.Laegis256_counter:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-
-.text
-
-/*
- * __load_partial: internal ABI
- * input:
- *   LEN - bytes
- *   SRC - src
- * output:
- *   MSG  - message block
- * changed:
- *   T0
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG, MSG
-
-	mov LEN, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov LEN, %r8
-	and $0x1E, %r8
-	add SRC, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov LEN, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov LEN, %r8
-	and $0x1C, %r8
-	add SRC, %r8
-	shl $0x10, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov LEN, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov LEN, %r8
-	and $0x18, %r8
-	add SRC, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG
-
-	mov LEN, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov LEN, %r8
-	and $0x10, %r8
-	add SRC, %r8
-	pslldq $8, MSG
-	movq (%r8), T0
-	pxor T0, MSG
-
-.Lld_partial_8:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   LEN - bytes
- *   DST - dst
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov LEN, %r8
-	mov DST, %r9
-
-	movq T0, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0
-	movq T0, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $0x10, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-.macro update
-	movdqa STATE5, T0
-	aesenc STATE0, STATE5
-	aesenc STATE1, STATE0
-	aesenc STATE2, STATE1
-	aesenc STATE3, STATE2
-	aesenc STATE4, STATE3
-	aesenc T0,     STATE4
-.endm
-
-.macro update0 m
-	update
-	pxor \m, STATE5
-.endm
-
-.macro update1 m
-	update
-	pxor \m, STATE4
-.endm
-
-.macro update2 m
-	update
-	pxor \m, STATE3
-.endm
-
-.macro update3 m
-	update
-	pxor \m, STATE2
-.endm
-
-.macro update4 m
-	update
-	pxor \m, STATE1
-.endm
-
-.macro update5 m
-	update
-	pxor \m, STATE0
-.endm
-
-.macro state_load
-	movdqu 0x00(STATEP), STATE0
-	movdqu 0x10(STATEP), STATE1
-	movdqu 0x20(STATEP), STATE2
-	movdqu 0x30(STATEP), STATE3
-	movdqu 0x40(STATEP), STATE4
-	movdqu 0x50(STATEP), STATE5
-.endm
-
-.macro state_store s0 s1 s2 s3 s4 s5
-	movdqu \s5, 0x00(STATEP)
-	movdqu \s0, 0x10(STATEP)
-	movdqu \s1, 0x20(STATEP)
-	movdqu \s2, 0x30(STATEP)
-	movdqu \s3, 0x40(STATEP)
-	movdqu \s4, 0x50(STATEP)
-.endm
-
-.macro state_store0
-	state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
-.endm
-
-.macro state_store1
-	state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
-.endm
-
-.macro state_store2
-	state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
-.endm
-
-.macro state_store3
-	state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
-.endm
-
-.macro state_store4
-	state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
-.endm
-
-.macro state_store5
-	state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
-.endm
-
-/*
- * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv);
- */
-ENTRY(crypto_aegis256_aesni_init)
-	FRAME_BEGIN
-
-	/* load key: */
-	movdqa 0x00(%rsi), MSG
-	movdqa 0x10(%rsi), T1
-	movdqa MSG, STATE4
-	movdqa T1, STATE5
-
-	/* load IV: */
-	movdqu 0x00(%rdx), T2
-	movdqu 0x10(%rdx), T3
-	pxor MSG, T2
-	pxor T1, T3
-	movdqa T2, STATE0
-	movdqa T3, STATE1
-
-	/* load the constants: */
-	movdqa .Laegis256_const_0, STATE3
-	movdqa .Laegis256_const_1, STATE2
-	pxor STATE3, STATE4
-	pxor STATE2, STATE5
-
-	/* update 10 times with IV and KEY: */
-	update0 MSG
-	update1 T1
-	update2 T2
-	update3 T3
-	update4 MSG
-	update5 T1
-	update0 T2
-	update1 T3
-	update2 MSG
-	update3 T1
-	update4 T2
-	update5 T3
-	update0 MSG
-	update1 T1
-	update2 T2
-	update3 T3
-
-	state_store3
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_init)
-
-.macro ad_block a i
-	movdq\a (\i * 0x10)(SRC), MSG
-	update\i MSG
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lad_out_\i
-.endm
-
-/*
- * void crypto_aegis256_aesni_ad(void *state, unsigned int length,
- *                               const void *data);
- */
-ENTRY(crypto_aegis256_aesni_ad)
-	FRAME_BEGIN
-
-	cmp $0x10, LEN
-	jb .Lad_out
-
-	state_load
-
-	mov  SRC, %r8
-	and $0xf, %r8
-	jnz .Lad_u_loop
-
-.align 8
-.Lad_a_loop:
-	ad_block a 0
-	ad_block a 1
-	ad_block a 2
-	ad_block a 3
-	ad_block a 4
-	ad_block a 5
-
-	add $0x60, SRC
-	jmp .Lad_a_loop
-
-.align 8
-.Lad_u_loop:
-	ad_block u 0
-	ad_block u 1
-	ad_block u 2
-	ad_block u 3
-	ad_block u 4
-	ad_block u 5
-
-	add $0x60, SRC
-	jmp .Lad_u_loop
-
-.Lad_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Lad_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Lad_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Lad_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Lad_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Lad_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_ad)
-
-.macro crypt m s0 s1 s2 s3 s4 s5
-	pxor \s1, \m
-	pxor \s4, \m
-	pxor \s5, \m
-	movdqa \s2, T3
-	pand \s3, T3
-	pxor T3, \m
-.endm
-
-.macro crypt0 m
-	crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5
-.endm
-
-.macro crypt1 m
-	crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4
-.endm
-
-.macro crypt2 m
-	crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3
-.endm
-
-.macro crypt3 m
-	crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2
-.endm
-
-.macro crypt4 m
-	crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1
-.endm
-
-.macro crypt5 m
-	crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0
-.endm
-
-.macro encrypt_block a i
-	movdq\a (\i * 0x10)(SRC), MSG
-	movdqa MSG, T0
-	crypt\i T0
-	movdq\a T0, (\i * 0x10)(DST)
-
-	update\i MSG
-
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Lenc_out_\i
-.endm
-
-.macro decrypt_block a i
-	movdq\a (\i * 0x10)(SRC), MSG
-	crypt\i MSG
-	movdq\a MSG, (\i * 0x10)(DST)
-
-	update\i MSG
-
-	sub $0x10, LEN
-	cmp $0x10, LEN
-	jl .Ldec_out_\i
-.endm
-
-/*
- * void crypto_aegis256_aesni_enc(void *state, unsigned int length,
- *                                const void *src, void *dst);
- */
-ENTRY(crypto_aegis256_aesni_enc)
-	FRAME_BEGIN
-
-	cmp $0x10, LEN
-	jb .Lenc_out
-
-	state_load
-
-	mov  SRC, %r8
-	or   DST, %r8
-	and $0xf, %r8
-	jnz .Lenc_u_loop
-
-.align 8
-.Lenc_a_loop:
-	encrypt_block a 0
-	encrypt_block a 1
-	encrypt_block a 2
-	encrypt_block a 3
-	encrypt_block a 4
-	encrypt_block a 5
-
-	add $0x60, SRC
-	add $0x60, DST
-	jmp .Lenc_a_loop
-
-.align 8
-.Lenc_u_loop:
-	encrypt_block u 0
-	encrypt_block u 1
-	encrypt_block u 2
-	encrypt_block u 3
-	encrypt_block u 4
-	encrypt_block u 5
-
-	add $0x60, SRC
-	add $0x60, DST
-	jmp .Lenc_u_loop
-
-.Lenc_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Lenc_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Lenc_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Lenc_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Lenc_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Lenc_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_enc)
-
-/*
- * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length,
- *                                     const void *src, void *dst);
- */
-ENTRY(crypto_aegis256_aesni_enc_tail)
-	FRAME_BEGIN
-
-	state_load
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa MSG, T0
-	crypt0 T0
-
-	call __store_partial
-
-	update0 MSG
-
-	state_store0
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_enc_tail)
-
-/*
- * void crypto_aegis256_aesni_dec(void *state, unsigned int length,
- *                                const void *src, void *dst);
- */
-ENTRY(crypto_aegis256_aesni_dec)
-	FRAME_BEGIN
-
-	cmp $0x10, LEN
-	jb .Ldec_out
-
-	state_load
-
-	mov  SRC, %r8
-	or   DST, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 8
-.Ldec_a_loop:
-	decrypt_block a 0
-	decrypt_block a 1
-	decrypt_block a 2
-	decrypt_block a 3
-	decrypt_block a 4
-	decrypt_block a 5
-
-	add $0x60, SRC
-	add $0x60, DST
-	jmp .Ldec_a_loop
-
-.align 8
-.Ldec_u_loop:
-	decrypt_block u 0
-	decrypt_block u 1
-	decrypt_block u 2
-	decrypt_block u 3
-	decrypt_block u 4
-	decrypt_block u 5
-
-	add $0x60, SRC
-	add $0x60, DST
-	jmp .Ldec_u_loop
-
-.Ldec_out_0:
-	state_store0
-	FRAME_END
-	ret
-
-.Ldec_out_1:
-	state_store1
-	FRAME_END
-	ret
-
-.Ldec_out_2:
-	state_store2
-	FRAME_END
-	ret
-
-.Ldec_out_3:
-	state_store3
-	FRAME_END
-	ret
-
-.Ldec_out_4:
-	state_store4
-	FRAME_END
-	ret
-
-.Ldec_out_5:
-	state_store5
-	FRAME_END
-	ret
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_dec)
-
-/*
- * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length,
- *                                     const void *src, void *dst);
- */
-ENTRY(crypto_aegis256_aesni_dec_tail)
-	FRAME_BEGIN
-
-	state_load
-
-	/* decrypt message: */
-	call __load_partial
-
-	crypt0 MSG
-
-	movdqa MSG, T0
-	call __store_partial
-
-	/* mask with byte count: */
-	movq LEN, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	movdqa .Laegis256_counter, T1
-	pcmpgtb T1, T0
-	pand T0, MSG
-
-	update0 MSG
-
-	state_store0
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_dec_tail)
-
-/*
- * void crypto_aegis256_aesni_final(void *state, void *tag_xor,
- *                                  u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_aegis256_aesni_final)
-	FRAME_BEGIN
-
-	state_load
-
-	/* prepare length block: */
-	movq %rdx, MSG
-	movq %rcx, T0
-	pslldq $8, T0
-	pxor T0, MSG
-	psllq $3, MSG /* multiply by 8 (to get bit count) */
-
-	pxor STATE3, MSG
-
-	/* update state: */
-	update0 MSG
-	update1 MSG
-	update2 MSG
-	update3 MSG
-	update4 MSG
-	update5 MSG
-	update0 MSG
-
-	/* xor tag: */
-	movdqu (%rsi), MSG
-
-	pxor STATE0, MSG
-	pxor STATE1, MSG
-	pxor STATE2, MSG
-	pxor STATE3, MSG
-	pxor STATE4, MSG
-	pxor STATE5, MSG
-
-	movdqu MSG, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_aegis256_aesni_final)
diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c
deleted file mode 100644
index f84da27..0000000
--- a/arch/x86/crypto/aegis256-aesni-glue.c
+++ /dev/null
@@ -1,293 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The AEGIS-256 Authenticated-Encryption Algorithm
- *   Glue for AES-NI + SSE2 implementation
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-#define AEGIS256_BLOCK_ALIGN 16
-#define AEGIS256_BLOCK_SIZE 16
-#define AEGIS256_NONCE_SIZE 32
-#define AEGIS256_STATE_BLOCKS 6
-#define AEGIS256_KEY_SIZE 32
-#define AEGIS256_MIN_AUTH_SIZE 8
-#define AEGIS256_MAX_AUTH_SIZE 16
-
-asmlinkage void crypto_aegis256_aesni_init(void *state, void *key, void *iv);
-
-asmlinkage void crypto_aegis256_aesni_ad(
-		void *state, unsigned int length, const void *data);
-
-asmlinkage void crypto_aegis256_aesni_enc(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis256_aesni_dec(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis256_aesni_enc_tail(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis256_aesni_dec_tail(
-		void *state, unsigned int length, const void *src, void *dst);
-
-asmlinkage void crypto_aegis256_aesni_final(
-		void *state, void *tag_xor, unsigned int cryptlen,
-		unsigned int assoclen);
-
-struct aegis_block {
-	u8 bytes[AEGIS256_BLOCK_SIZE] __aligned(AEGIS256_BLOCK_ALIGN);
-};
-
-struct aegis_state {
-	struct aegis_block blocks[AEGIS256_STATE_BLOCKS];
-};
-
-struct aegis_ctx {
-	struct aegis_block key[AEGIS256_KEY_SIZE / AEGIS256_BLOCK_SIZE];
-};
-
-struct aegis_crypt_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_blocks)(void *state, unsigned int length, const void *src,
-			     void *dst);
-	void (*crypt_tail)(void *state, unsigned int length, const void *src,
-			   void *dst);
-};
-
-static void crypto_aegis256_aesni_process_ad(
-		struct aegis_state *state, struct scatterlist *sg_src,
-		unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct aegis_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= AEGIS256_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = AEGIS256_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				crypto_aegis256_aesni_ad(state,
-							 AEGIS256_BLOCK_SIZE,
-							 buf.bytes);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			crypto_aegis256_aesni_ad(state, left, src);
-
-			src += left & ~(AEGIS256_BLOCK_SIZE - 1);
-			left &= AEGIS256_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-		pos += left;
-		assoclen -= size;
-
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, AEGIS256_BLOCK_SIZE - pos);
-		crypto_aegis256_aesni_ad(state, AEGIS256_BLOCK_SIZE, buf.bytes);
-	}
-}
-
-static void crypto_aegis256_aesni_process_crypt(
-		struct aegis_state *state, struct skcipher_walk *walk,
-		const struct aegis_crypt_ops *ops)
-{
-	while (walk->nbytes >= AEGIS256_BLOCK_SIZE) {
-		ops->crypt_blocks(state,
-				  round_down(walk->nbytes, AEGIS256_BLOCK_SIZE),
-				  walk->src.virt.addr, walk->dst.virt.addr);
-		skcipher_walk_done(walk, walk->nbytes % AEGIS256_BLOCK_SIZE);
-	}
-
-	if (walk->nbytes) {
-		ops->crypt_tail(state, walk->nbytes, walk->src.virt.addr,
-				walk->dst.virt.addr);
-		skcipher_walk_done(walk, 0);
-	}
-}
-
-static struct aegis_ctx *crypto_aegis256_aesni_ctx(struct crypto_aead *aead)
-{
-	u8 *ctx = crypto_aead_ctx(aead);
-	ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx));
-	return (void *)ctx;
-}
-
-static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 *key,
-					unsigned int keylen)
-{
-	struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead);
-
-	if (keylen != AEGIS256_KEY_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->key, key, AEGIS256_KEY_SIZE);
-
-	return 0;
-}
-
-static int crypto_aegis256_aesni_setauthsize(struct crypto_aead *tfm,
-						unsigned int authsize)
-{
-	if (authsize > AEGIS256_MAX_AUTH_SIZE)
-		return -EINVAL;
-	if (authsize < AEGIS256_MIN_AUTH_SIZE)
-		return -EINVAL;
-	return 0;
-}
-
-static void crypto_aegis256_aesni_crypt(struct aead_request *req,
-					struct aegis_block *tag_xor,
-					unsigned int cryptlen,
-					const struct aegis_crypt_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm);
-	struct skcipher_walk walk;
-	struct aegis_state state;
-
-	ops->skcipher_walk_init(&walk, req, true);
-
-	kernel_fpu_begin();
-
-	crypto_aegis256_aesni_init(&state, ctx->key, req->iv);
-	crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen);
-	crypto_aegis256_aesni_process_crypt(&state, &walk, ops);
-	crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen);
-
-	kernel_fpu_end();
-}
-
-static int crypto_aegis256_aesni_encrypt(struct aead_request *req)
-{
-	static const struct aegis_crypt_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_blocks = crypto_aegis256_aesni_enc,
-		.crypt_tail = crypto_aegis256_aesni_enc_tail,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-
-static int crypto_aegis256_aesni_decrypt(struct aead_request *req)
-{
-	static const struct aegis_block zeros = {};
-
-	static const struct aegis_crypt_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_blocks = crypto_aegis256_aesni_dec,
-		.crypt_tail = crypto_aegis256_aesni_dec_tail,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS);
-
-	return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0;
-}
-
-static int crypto_aegis256_aesni_init_tfm(struct crypto_aead *aead)
-{
-	return 0;
-}
-
-static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead)
-{
-}
-
-static struct aead_alg crypto_aegis256_aesni_alg = {
-	.setkey = crypto_aegis256_aesni_setkey,
-	.setauthsize = crypto_aegis256_aesni_setauthsize,
-	.encrypt = crypto_aegis256_aesni_encrypt,
-	.decrypt = crypto_aegis256_aesni_decrypt,
-	.init = crypto_aegis256_aesni_init_tfm,
-	.exit = crypto_aegis256_aesni_exit_tfm,
-
-	.ivsize = AEGIS256_NONCE_SIZE,
-	.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
-	.chunksize = AEGIS256_BLOCK_SIZE,
-
-	.base = {
-		.cra_flags = CRYPTO_ALG_INTERNAL,
-		.cra_blocksize = 1,
-		.cra_ctxsize = sizeof(struct aegis_ctx) +
-			       __alignof__(struct aegis_ctx),
-		.cra_alignmask = 0,
-		.cra_priority = 400,
-
-		.cra_name = "__aegis256",
-		.cra_driver_name = "__aegis256-aesni",
-
-		.cra_module = THIS_MODULE,
-	}
-};
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_aegis256_aesni_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
-	    !boot_cpu_has(X86_FEATURE_AES) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_aegis256_aesni_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_aegis256_aesni_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_aegis256_aesni_alg, 1, &simd_alg);
-}
-
-module_init(crypto_aegis256_aesni_module_init);
-module_exit(crypto_aegis256_aesni_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm -- AESNI+SSE2 implementation");
-MODULE_ALIAS_CRYPTO("aegis256");
-MODULE_ALIAS_CRYPTO("aegis256-aesni");
diff --git a/arch/x86/crypto/aes-i586-asm_32.S b/arch/x86/crypto/aes-i586-asm_32.S
deleted file mode 100644
index 2849dbc..0000000
--- a/arch/x86/crypto/aes-i586-asm_32.S
+++ /dev/null
@@ -1,362 +0,0 @@
-// -------------------------------------------------------------------------
-// Copyright (c) 2001, Dr Brian Gladman <                 >, Worcester, UK.
-// All rights reserved.
-//
-// LICENSE TERMS
-//
-// The free distribution and use of this software in both source and binary 
-// form is allowed (with or without changes) provided that:
-//
-//   1. distributions of this source code include the above copyright 
-//      notice, this list of conditions and the following disclaimer//
-//
-//   2. distributions in binary form include the above copyright
-//      notice, this list of conditions and the following disclaimer
-//      in the documentation and/or other associated materials//
-//
-//   3. the copyright holder's name is not used to endorse products 
-//      built using this software without specific written permission.
-//
-//
-// ALTERNATIVELY, provided that this notice is retained in full, this product
-// may be distributed under the terms of the GNU General Public License (GPL),
-// in which case the provisions of the GPL apply INSTEAD OF those given above.
-//
-// Copyright (c) 2004 Linus Torvalds <torvalds@osdl.org>
-// Copyright (c) 2004 Red Hat, Inc., James Morris <jmorris@redhat.com>
-
-// DISCLAIMER
-//
-// This software is provided 'as is' with no explicit or implied warranties
-// in respect of its properties including, but not limited to, correctness 
-// and fitness for purpose.
-// -------------------------------------------------------------------------
-// Issue Date: 29/07/2002
-
-.file "aes-i586-asm.S"
-.text
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-#define tlen 1024   // length of each of 4 'xor' arrays (256 32-bit words)
-
-/* offsets to parameters with one register pushed onto stack */
-#define ctx 8
-#define out_blk 12
-#define in_blk 16
-
-/* offsets in crypto_aes_ctx structure */
-#define klen (480)
-#define ekey (0)
-#define dkey (240)
-
-// register mapping for encrypt and decrypt subroutines
-
-#define r0  eax
-#define r1  ebx
-#define r2  ecx
-#define r3  edx
-#define r4  esi
-#define r5  edi
-
-#define eaxl  al
-#define eaxh  ah
-#define ebxl  bl
-#define ebxh  bh
-#define ecxl  cl
-#define ecxh  ch
-#define edxl  dl
-#define edxh  dh
-
-#define _h(reg) reg##h
-#define h(reg) _h(reg)
-
-#define _l(reg) reg##l
-#define l(reg) _l(reg)
-
-// This macro takes a 32-bit word representing a column and uses
-// each of its four bytes to index into four tables of 256 32-bit
-// words to obtain values that are then xored into the appropriate
-// output registers r0, r1, r4 or r5.  
-
-// Parameters:
-// table table base address
-//   %1  out_state[0]
-//   %2  out_state[1]
-//   %3  out_state[2]
-//   %4  out_state[3]
-//   idx input register for the round (destroyed)
-//   tmp scratch register for the round
-// sched key schedule
-
-#define do_col(table, a1,a2,a3,a4, idx, tmp)	\
-	movzx   %l(idx),%tmp;			\
-	xor     table(,%tmp,4),%a1;		\
-	movzx   %h(idx),%tmp;			\
-	shr     $16,%idx;			\
-	xor     table+tlen(,%tmp,4),%a2;	\
-	movzx   %l(idx),%tmp;			\
-	movzx   %h(idx),%idx;			\
-	xor     table+2*tlen(,%tmp,4),%a3;	\
-	xor     table+3*tlen(,%idx,4),%a4;
-
-// initialise output registers from the key schedule
-// NB1: original value of a3 is in idx on exit
-// NB2: original values of a1,a2,a4 aren't used
-#define do_fcol(table, a1,a2,a3,a4, idx, tmp, sched) \
-	mov     0 sched,%a1;			\
-	movzx   %l(idx),%tmp;			\
-	mov     12 sched,%a2;			\
-	xor     table(,%tmp,4),%a1;		\
-	mov     4 sched,%a4;			\
-	movzx   %h(idx),%tmp;			\
-	shr     $16,%idx;			\
-	xor     table+tlen(,%tmp,4),%a2;	\
-	movzx   %l(idx),%tmp;			\
-	movzx   %h(idx),%idx;			\
-	xor     table+3*tlen(,%idx,4),%a4;	\
-	mov     %a3,%idx;			\
-	mov     8 sched,%a3;			\
-	xor     table+2*tlen(,%tmp,4),%a3;
-
-// initialise output registers from the key schedule
-// NB1: original value of a3 is in idx on exit
-// NB2: original values of a1,a2,a4 aren't used
-#define do_icol(table, a1,a2,a3,a4, idx, tmp, sched) \
-	mov     0 sched,%a1;			\
-	movzx   %l(idx),%tmp;			\
-	mov     4 sched,%a2;			\
-	xor     table(,%tmp,4),%a1;		\
-	mov     12 sched,%a4;			\
-	movzx   %h(idx),%tmp;			\
-	shr     $16,%idx;			\
-	xor     table+tlen(,%tmp,4),%a2;	\
-	movzx   %l(idx),%tmp;			\
-	movzx   %h(idx),%idx;			\
-	xor     table+3*tlen(,%idx,4),%a4;	\
-	mov     %a3,%idx;			\
-	mov     8 sched,%a3;			\
-	xor     table+2*tlen(,%tmp,4),%a3;
-
-
-// original Gladman had conditional saves to MMX regs.
-#define save(a1, a2)		\
-	mov     %a2,4*a1(%esp)
-
-#define restore(a1, a2)		\
-	mov     4*a2(%esp),%a1
-
-// These macros perform a forward encryption cycle. They are entered with
-// the first previous round column values in r0,r1,r4,r5 and
-// exit with the final values in the same registers, using stack
-// for temporary storage.
-
-// round column values
-// on entry: r0,r1,r4,r5
-// on exit:  r2,r1,r4,r5
-#define fwd_rnd1(arg, table)						\
-	save   (0,r1);							\
-	save   (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_fcol(table, r2,r5,r4,r1, r0,r3, arg);	/* idx=r0 */	\
-	do_col (table, r4,r1,r2,r5, r0,r3);		/* idx=r4 */	\
-	restore(r0,0);							\
-	do_col (table, r1,r2,r5,r4, r0,r3);		/* idx=r1 */	\
-	restore(r0,1);							\
-	do_col (table, r5,r4,r1,r2, r0,r3);		/* idx=r5 */
-
-// round column values
-// on entry: r2,r1,r4,r5
-// on exit:  r0,r1,r4,r5
-#define fwd_rnd2(arg, table)						\
-	save   (0,r1);							\
-	save   (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_fcol(table, r0,r5,r4,r1, r2,r3, arg);	/* idx=r2 */	\
-	do_col (table, r4,r1,r0,r5, r2,r3);		/* idx=r4 */	\
-	restore(r2,0);							\
-	do_col (table, r1,r0,r5,r4, r2,r3);		/* idx=r1 */	\
-	restore(r2,1);							\
-	do_col (table, r5,r4,r1,r0, r2,r3);		/* idx=r5 */
-
-// These macros performs an inverse encryption cycle. They are entered with
-// the first previous round column values in r0,r1,r4,r5 and
-// exit with the final values in the same registers, using stack
-// for temporary storage
-
-// round column values
-// on entry: r0,r1,r4,r5
-// on exit:  r2,r1,r4,r5
-#define inv_rnd1(arg, table)						\
-	save    (0,r1);							\
-	save    (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_icol(table, r2,r1,r4,r5, r0,r3, arg);	/* idx=r0 */	\
-	do_col (table, r4,r5,r2,r1, r0,r3);		/* idx=r4 */	\
-	restore(r0,0);							\
-	do_col (table, r1,r4,r5,r2, r0,r3);		/* idx=r1 */	\
-	restore(r0,1);							\
-	do_col (table, r5,r2,r1,r4, r0,r3);		/* idx=r5 */
-
-// round column values
-// on entry: r2,r1,r4,r5
-// on exit:  r0,r1,r4,r5
-#define inv_rnd2(arg, table)						\
-	save    (0,r1);							\
-	save    (1,r5);							\
-									\
-	/* compute new column values */					\
-	do_icol(table, r0,r1,r4,r5, r2,r3, arg);	/* idx=r2 */	\
-	do_col (table, r4,r5,r0,r1, r2,r3);		/* idx=r4 */	\
-	restore(r2,0);							\
-	do_col (table, r1,r4,r5,r0, r2,r3);		/* idx=r1 */	\
-	restore(r2,1);							\
-	do_col (table, r5,r0,r1,r4, r2,r3);		/* idx=r5 */
-
-// AES (Rijndael) Encryption Subroutine
-/* void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
-
-.extern  crypto_ft_tab
-.extern  crypto_fl_tab
-
-ENTRY(aes_enc_blk)
-	push    %ebp
-	mov     ctx(%esp),%ebp
-
-// CAUTION: the order and the values used in these assigns 
-// rely on the register mappings
-
-1:	push    %ebx
-	mov     in_blk+4(%esp),%r2
-	push    %esi
-	mov     klen(%ebp),%r3   // key size
-	push    %edi
-#if ekey != 0
-	lea     ekey(%ebp),%ebp  // key pointer
-#endif
-
-// input four columns and xor in first round key
-
-	mov     (%r2),%r0
-	mov     4(%r2),%r1
-	mov     8(%r2),%r4
-	mov     12(%r2),%r5
-	xor     (%ebp),%r0
-	xor     4(%ebp),%r1
-	xor     8(%ebp),%r4
-	xor     12(%ebp),%r5
-
-	sub     $8,%esp		// space for register saves on stack
-	add     $16,%ebp	// increment to next round key
-	cmp     $24,%r3
-	jb      4f		// 10 rounds for 128-bit key
-	lea     32(%ebp),%ebp
-	je      3f		// 12 rounds for 192-bit key
-	lea     32(%ebp),%ebp
-
-2:	fwd_rnd1( -64(%ebp), crypto_ft_tab)	// 14 rounds for 256-bit key
-	fwd_rnd2( -48(%ebp), crypto_ft_tab)
-3:	fwd_rnd1( -32(%ebp), crypto_ft_tab)	// 12 rounds for 192-bit key
-	fwd_rnd2( -16(%ebp), crypto_ft_tab)
-4:	fwd_rnd1(    (%ebp), crypto_ft_tab)	// 10 rounds for 128-bit key
-	fwd_rnd2( +16(%ebp), crypto_ft_tab)
-	fwd_rnd1( +32(%ebp), crypto_ft_tab)
-	fwd_rnd2( +48(%ebp), crypto_ft_tab)
-	fwd_rnd1( +64(%ebp), crypto_ft_tab)
-	fwd_rnd2( +80(%ebp), crypto_ft_tab)
-	fwd_rnd1( +96(%ebp), crypto_ft_tab)
-	fwd_rnd2(+112(%ebp), crypto_ft_tab)
-	fwd_rnd1(+128(%ebp), crypto_ft_tab)
-	fwd_rnd2(+144(%ebp), crypto_fl_tab)	// last round uses a different table
-
-// move final values to the output array.  CAUTION: the 
-// order of these assigns rely on the register mappings
-
-	add     $8,%esp
-	mov     out_blk+12(%esp),%ebp
-	mov     %r5,12(%ebp)
-	pop     %edi
-	mov     %r4,8(%ebp)
-	pop     %esi
-	mov     %r1,4(%ebp)
-	pop     %ebx
-	mov     %r0,(%ebp)
-	pop     %ebp
-	ret
-ENDPROC(aes_enc_blk)
-
-// AES (Rijndael) Decryption Subroutine
-/* void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out_blk, const u8 *in_blk) */
-
-.extern  crypto_it_tab
-.extern  crypto_il_tab
-
-ENTRY(aes_dec_blk)
-	push    %ebp
-	mov     ctx(%esp),%ebp
-
-// CAUTION: the order and the values used in these assigns 
-// rely on the register mappings
-
-1:	push    %ebx
-	mov     in_blk+4(%esp),%r2
-	push    %esi
-	mov     klen(%ebp),%r3   // key size
-	push    %edi
-#if dkey != 0
-	lea     dkey(%ebp),%ebp  // key pointer
-#endif
-	
-// input four columns and xor in first round key
-
-	mov     (%r2),%r0
-	mov     4(%r2),%r1
-	mov     8(%r2),%r4
-	mov     12(%r2),%r5
-	xor     (%ebp),%r0
-	xor     4(%ebp),%r1
-	xor     8(%ebp),%r4
-	xor     12(%ebp),%r5
-
-	sub     $8,%esp		// space for register saves on stack
-	add     $16,%ebp	// increment to next round key
-	cmp     $24,%r3
-	jb      4f		// 10 rounds for 128-bit key
-	lea     32(%ebp),%ebp
-	je      3f		// 12 rounds for 192-bit key
-	lea     32(%ebp),%ebp
-
-2:	inv_rnd1( -64(%ebp), crypto_it_tab)	// 14 rounds for 256-bit key
-	inv_rnd2( -48(%ebp), crypto_it_tab)
-3:	inv_rnd1( -32(%ebp), crypto_it_tab)	// 12 rounds for 192-bit key
-	inv_rnd2( -16(%ebp), crypto_it_tab)
-4:	inv_rnd1(    (%ebp), crypto_it_tab)	// 10 rounds for 128-bit key
-	inv_rnd2( +16(%ebp), crypto_it_tab)
-	inv_rnd1( +32(%ebp), crypto_it_tab)
-	inv_rnd2( +48(%ebp), crypto_it_tab)
-	inv_rnd1( +64(%ebp), crypto_it_tab)
-	inv_rnd2( +80(%ebp), crypto_it_tab)
-	inv_rnd1( +96(%ebp), crypto_it_tab)
-	inv_rnd2(+112(%ebp), crypto_it_tab)
-	inv_rnd1(+128(%ebp), crypto_it_tab)
-	inv_rnd2(+144(%ebp), crypto_il_tab)	// last round uses a different table
-
-// move final values to the output array.  CAUTION: the 
-// order of these assigns rely on the register mappings
-
-	add     $8,%esp
-	mov     out_blk+12(%esp),%ebp
-	mov     %r5,12(%ebp)
-	pop     %edi
-	mov     %r4,8(%ebp)
-	pop     %esi
-	mov     %r1,4(%ebp)
-	pop     %ebx
-	mov     %r0,(%ebp)
-	pop     %ebp
-	ret
-ENDPROC(aes_dec_blk)
diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S
deleted file mode 100644
index 8739cf7..0000000
--- a/arch/x86/crypto/aes-x86_64-asm_64.S
+++ /dev/null
@@ -1,185 +0,0 @@
-/* AES (Rijndael) implementation (FIPS PUB 197) for x86_64
- *
- * Copyright (C) 2005 Andreas Steinmetz, <ast@domdv.de>
- *
- * License:
- * This code can be distributed under the terms of the GNU General Public
- * License (GPL) Version 2 provided that the above header down to and
- * including this sentence is retained in full.
- */
-
-.extern crypto_ft_tab
-.extern crypto_it_tab
-.extern crypto_fl_tab
-.extern crypto_il_tab
-
-.text
-
-#include <linux/linkage.h>
-#include <asm/asm-offsets.h>
-
-#define R1	%rax
-#define R1E	%eax
-#define R1X	%ax
-#define R1H	%ah
-#define R1L	%al
-#define R2	%rbx
-#define R2E	%ebx
-#define R2X	%bx
-#define R2H	%bh
-#define R2L	%bl
-#define R3	%rcx
-#define R3E	%ecx
-#define R3X	%cx
-#define R3H	%ch
-#define R3L	%cl
-#define R4	%rdx
-#define R4E	%edx
-#define R4X	%dx
-#define R4H	%dh
-#define R4L	%dl
-#define R5	%rsi
-#define R5E	%esi
-#define R6	%rdi
-#define R6E	%edi
-#define R7	%r9	/* don't use %rbp; it breaks stack traces */
-#define R7E	%r9d
-#define R8	%r8
-#define R10	%r10
-#define R11	%r11
-
-#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \
-	ENTRY(FUNC);			\
-	movq	r1,r2;			\
-	leaq	KEY+48(r8),r9;		\
-	movq	r10,r11;		\
-	movl	(r7),r5 ## E;		\
-	movl	4(r7),r1 ## E;		\
-	movl	8(r7),r6 ## E;		\
-	movl	12(r7),r7 ## E;		\
-	movl	480(r8),r10 ## E;	\
-	xorl	-48(r9),r5 ## E;	\
-	xorl	-44(r9),r1 ## E;	\
-	xorl	-40(r9),r6 ## E;	\
-	xorl	-36(r9),r7 ## E;	\
-	cmpl	$24,r10 ## E;		\
-	jb	B128;			\
-	leaq	32(r9),r9;		\
-	je	B192;			\
-	leaq	32(r9),r9;
-
-#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \
-	movq	r1,r2;			\
-	movl	r5 ## E,(r9);		\
-	movl	r6 ## E,4(r9);		\
-	movl	r7 ## E,8(r9);		\
-	movl	r8 ## E,12(r9);		\
-	ret;				\
-	ENDPROC(FUNC);
-
-#define round(TAB,OFFSET,r1,r2,r3,r4,r5,r6,r7,r8,ra,rb,rc,rd) \
-	movzbl	r2 ## H,r5 ## E;	\
-	movzbl	r2 ## L,r6 ## E;	\
-	movl	TAB+1024(,r5,4),r5 ## E;\
-	movw	r4 ## X,r2 ## X;	\
-	movl	TAB(,r6,4),r6 ## E;	\
-	roll	$16,r2 ## E;		\
-	shrl	$16,r4 ## E;		\
-	movzbl	r4 ## L,r7 ## E;	\
-	movzbl	r4 ## H,r4 ## E;	\
-	xorl	OFFSET(r8),ra ## E;	\
-	xorl	OFFSET+4(r8),rb ## E;	\
-	xorl	TAB+3072(,r4,4),r5 ## E;\
-	xorl	TAB+2048(,r7,4),r6 ## E;\
-	movzbl	r1 ## L,r7 ## E;	\
-	movzbl	r1 ## H,r4 ## E;	\
-	movl	TAB+1024(,r4,4),r4 ## E;\
-	movw	r3 ## X,r1 ## X;	\
-	roll	$16,r1 ## E;		\
-	shrl	$16,r3 ## E;		\
-	xorl	TAB(,r7,4),r5 ## E;	\
-	movzbl	r3 ## L,r7 ## E;	\
-	movzbl	r3 ## H,r3 ## E;	\
-	xorl	TAB+3072(,r3,4),r4 ## E;\
-	xorl	TAB+2048(,r7,4),r5 ## E;\
-	movzbl	r1 ## L,r7 ## E;	\
-	movzbl	r1 ## H,r3 ## E;	\
-	shrl	$16,r1 ## E;		\
-	xorl	TAB+3072(,r3,4),r6 ## E;\
-	movl	TAB+2048(,r7,4),r3 ## E;\
-	movzbl	r1 ## L,r7 ## E;	\
-	movzbl	r1 ## H,r1 ## E;	\
-	xorl	TAB+1024(,r1,4),r6 ## E;\
-	xorl	TAB(,r7,4),r3 ## E;	\
-	movzbl	r2 ## H,r1 ## E;	\
-	movzbl	r2 ## L,r7 ## E;	\
-	shrl	$16,r2 ## E;		\
-	xorl	TAB+3072(,r1,4),r3 ## E;\
-	xorl	TAB+2048(,r7,4),r4 ## E;\
-	movzbl	r2 ## H,r1 ## E;	\
-	movzbl	r2 ## L,r2 ## E;	\
-	xorl	OFFSET+8(r8),rc ## E;	\
-	xorl	OFFSET+12(r8),rd ## E;	\
-	xorl	TAB+1024(,r1,4),r3 ## E;\
-	xorl	TAB(,r2,4),r4 ## E;
-
-#define move_regs(r1,r2,r3,r4) \
-	movl	r3 ## E,r1 ## E;	\
-	movl	r4 ## E,r2 ## E;
-
-#define entry(FUNC,KEY,B128,B192) \
-	prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11)
-
-#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11)
-
-#define encrypt_round(TAB,OFFSET) \
-	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \
-	move_regs(R1,R2,R5,R6)
-
-#define encrypt_final(TAB,OFFSET) \
-	round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4)
-
-#define decrypt_round(TAB,OFFSET) \
-	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4) \
-	move_regs(R1,R2,R5,R6)
-
-#define decrypt_final(TAB,OFFSET) \
-	round(TAB,OFFSET,R2,R1,R4,R3,R6,R5,R7,R10,R5,R6,R3,R4)
-
-/* void aes_enc_blk(stuct crypto_tfm *tfm, u8 *out, const u8 *in) */
-
-	entry(aes_enc_blk,0,.Le128,.Le192)
-	encrypt_round(crypto_ft_tab,-96)
-	encrypt_round(crypto_ft_tab,-80)
-.Le192:	encrypt_round(crypto_ft_tab,-64)
-	encrypt_round(crypto_ft_tab,-48)
-.Le128:	encrypt_round(crypto_ft_tab,-32)
-	encrypt_round(crypto_ft_tab,-16)
-	encrypt_round(crypto_ft_tab,  0)
-	encrypt_round(crypto_ft_tab, 16)
-	encrypt_round(crypto_ft_tab, 32)
-	encrypt_round(crypto_ft_tab, 48)
-	encrypt_round(crypto_ft_tab, 64)
-	encrypt_round(crypto_ft_tab, 80)
-	encrypt_round(crypto_ft_tab, 96)
-	encrypt_final(crypto_fl_tab,112)
-	return(aes_enc_blk)
-
-/* void aes_dec_blk(struct crypto_tfm *tfm, u8 *out, const u8 *in) */
-
-	entry(aes_dec_blk,240,.Ld128,.Ld192)
-	decrypt_round(crypto_it_tab,-96)
-	decrypt_round(crypto_it_tab,-80)
-.Ld192:	decrypt_round(crypto_it_tab,-64)
-	decrypt_round(crypto_it_tab,-48)
-.Ld128:	decrypt_round(crypto_it_tab,-32)
-	decrypt_round(crypto_it_tab,-16)
-	decrypt_round(crypto_it_tab,  0)
-	decrypt_round(crypto_it_tab, 16)
-	decrypt_round(crypto_it_tab, 32)
-	decrypt_round(crypto_it_tab, 48)
-	decrypt_round(crypto_it_tab, 64)
-	decrypt_round(crypto_it_tab, 80)
-	decrypt_round(crypto_it_tab, 96)
-	decrypt_final(crypto_il_tab,112)
-	return(aes_dec_blk)
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index 9e9d819..7b7dc05 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -1,71 +1 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/*
- * Glue Code for the asm optimized version of the AES Cipher Algorithm
- *
- */
-
-#include <linux/module.h>
-#include <crypto/aes.h>
-#include <asm/crypto/aes.h>
-
-asmlinkage void aes_enc_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
-asmlinkage void aes_dec_blk(struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
-
-void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
-{
-	aes_enc_blk(ctx, dst, src);
-}
-EXPORT_SYMBOL_GPL(crypto_aes_encrypt_x86);
-
-void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst, const u8 *src)
-{
-	aes_dec_blk(ctx, dst, src);
-}
-EXPORT_SYMBOL_GPL(crypto_aes_decrypt_x86);
-
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	aes_enc_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	aes_dec_blk(crypto_tfm_ctx(tfm), dst, src);
-}
-
-static struct crypto_alg aes_alg = {
-	.cra_name		= "aes",
-	.cra_driver_name	= "aes-asm",
-	.cra_priority		= 200,
-	.cra_flags		= CRYPTO_ALG_TYPE_CIPHER,
-	.cra_blocksize		= AES_BLOCK_SIZE,
-	.cra_ctxsize		= sizeof(struct crypto_aes_ctx),
-	.cra_module		= THIS_MODULE,
-	.cra_u	= {
-		.cipher	= {
-			.cia_min_keysize	= AES_MIN_KEY_SIZE,
-			.cia_max_keysize	= AES_MAX_KEY_SIZE,
-			.cia_setkey		= crypto_aes_set_key,
-			.cia_encrypt		= aes_encrypt,
-			.cia_decrypt		= aes_decrypt
-		}
-	}
-};
-
-static int __init aes_init(void)
-{
-	return crypto_register_alg(&aes_alg);
-}
-
-static void __exit aes_fini(void)
-{
-	crypto_unregister_alg(&aes_alg);
-}
-
-module_init(aes_init);
-module_exit(aes_fini);
-
-MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("aes");
-MODULE_ALIAS_CRYPTO("aes-asm");
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 73c0ccb..3e707e8 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -26,7 +26,6 @@
 #include <crypto/gcm.h>
 #include <crypto/xts.h>
 #include <asm/cpu_device_id.h>
-#include <asm/crypto/aes.h>
 #include <asm/simd.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/aead.h>
@@ -329,7 +328,7 @@
 	}
 
 	if (!crypto_simd_usable())
-		err = crypto_aes_expand_key(ctx, in_key, key_len);
+		err = aes_expandkey(ctx, in_key, key_len);
 	else {
 		kernel_fpu_begin();
 		err = aesni_set_key(ctx, in_key, key_len);
@@ -345,26 +344,26 @@
 	return aes_set_key_common(tfm, crypto_tfm_ctx(tfm), in_key, key_len);
 }
 
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void aesni_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
 
-	if (!crypto_simd_usable())
-		crypto_aes_encrypt_x86(ctx, dst, src);
-	else {
+	if (!crypto_simd_usable()) {
+		aes_encrypt(ctx, dst, src);
+	} else {
 		kernel_fpu_begin();
 		aesni_enc(ctx, dst, src);
 		kernel_fpu_end();
 	}
 }
 
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void aesni_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct crypto_aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(tfm));
 
-	if (!crypto_simd_usable())
-		crypto_aes_decrypt_x86(ctx, dst, src);
-	else {
+	if (!crypto_simd_usable()) {
+		aes_decrypt(ctx, dst, src);
+	} else {
 		kernel_fpu_begin();
 		aesni_dec(ctx, dst, src);
 		kernel_fpu_end();
@@ -610,7 +609,8 @@
 	return glue_xts_req_128bit(&aesni_enc_xts, req,
 				   XTS_TWEAK_CAST(aesni_xts_tweak),
 				   aes_ctx(ctx->raw_tweak_ctx),
-				   aes_ctx(ctx->raw_crypt_ctx));
+				   aes_ctx(ctx->raw_crypt_ctx),
+				   false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -621,32 +621,28 @@
 	return glue_xts_req_128bit(&aesni_dec_xts, req,
 				   XTS_TWEAK_CAST(aesni_xts_tweak),
 				   aes_ctx(ctx->raw_tweak_ctx),
-				   aes_ctx(ctx->raw_crypt_ctx));
+				   aes_ctx(ctx->raw_crypt_ctx),
+				   true);
 }
 
 static int
 rfc4106_set_hash_subkey(u8 *hash_subkey, const u8 *key, unsigned int key_len)
 {
-	struct crypto_cipher *tfm;
+	struct crypto_aes_ctx ctx;
 	int ret;
 
-	tfm = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(tfm))
-		return PTR_ERR(tfm);
-
-	ret = crypto_cipher_setkey(tfm, key, key_len);
+	ret = aes_expandkey(&ctx, key, key_len);
 	if (ret)
-		goto out_free_cipher;
+		return ret;
 
 	/* Clear the data in the hash sub key container to zero.*/
 	/* We want to cipher all zeros to create the hash sub key. */
 	memset(hash_subkey, 0, RFC4106_HASH_SUBKEY_SIZE);
 
-	crypto_cipher_encrypt_one(tfm, hash_subkey, hash_subkey);
+	aes_encrypt(&ctx, hash_subkey, hash_subkey);
 
-out_free_cipher:
-	crypto_free_cipher(tfm);
-	return ret;
+	memzero_explicit(&ctx, sizeof(ctx));
+	return 0;
 }
 
 static int common_rfc4106_set_key(struct crypto_aead *aead, const u8 *key,
@@ -919,8 +915,8 @@
 			.cia_min_keysize	= AES_MIN_KEY_SIZE,
 			.cia_max_keysize	= AES_MAX_KEY_SIZE,
 			.cia_setkey		= aes_set_key,
-			.cia_encrypt		= aes_encrypt,
-			.cia_decrypt		= aes_decrypt
+			.cia_encrypt		= aesni_encrypt,
+			.cia_decrypt		= aesni_decrypt
 		}
 	}
 };
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index abf298c..a4f0012 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -182,7 +182,7 @@
 
 	return glue_xts_req_128bit(&camellia_enc_xts, req,
 				   XTS_TWEAK_CAST(camellia_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -192,7 +192,7 @@
 
 	return glue_xts_req_128bit(&camellia_dec_xts, req,
 				   XTS_TWEAK_CAST(camellia_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg camellia_algs[] = {
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 0c22d84..f28d282 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -208,7 +208,7 @@
 
 	return glue_xts_req_128bit(&camellia_enc_xts, req,
 				   XTS_TWEAK_CAST(camellia_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -218,7 +218,7 @@
 
 	return glue_xts_req_128bit(&camellia_dec_xts, req,
 				   XTS_TWEAK_CAST(camellia_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg camellia_algs[] = {
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 645f8f1..a8a38ff 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -201,7 +201,7 @@
 
 	return glue_xts_req_128bit(&cast6_enc_xts, req,
 				   XTS_TWEAK_CAST(__cast6_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -211,7 +211,7 @@
 
 	return glue_xts_req_128bit(&cast6_dec_xts, req,
 				   XTS_TWEAK_CAST(__cast6_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg cast6_algs[] = {
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 968386c..89830e5 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -19,8 +19,8 @@
 #include <linux/types.h>
 
 struct des3_ede_x86_ctx {
-	u32 enc_expkey[DES3_EDE_EXPKEY_WORDS];
-	u32 dec_expkey[DES3_EDE_EXPKEY_WORDS];
+	struct des3_ede_ctx enc;
+	struct des3_ede_ctx dec;
 };
 
 /* regular block cipher functions */
@@ -34,7 +34,7 @@
 static inline void des3_ede_enc_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
 				    const u8 *src)
 {
-	u32 *enc_ctx = ctx->enc_expkey;
+	u32 *enc_ctx = ctx->enc.expkey;
 
 	des3_ede_x86_64_crypt_blk(enc_ctx, dst, src);
 }
@@ -42,7 +42,7 @@
 static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst,
 				    const u8 *src)
 {
-	u32 *dec_ctx = ctx->dec_expkey;
+	u32 *dec_ctx = ctx->dec.expkey;
 
 	des3_ede_x86_64_crypt_blk(dec_ctx, dst, src);
 }
@@ -50,7 +50,7 @@
 static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
 					 const u8 *src)
 {
-	u32 *enc_ctx = ctx->enc_expkey;
+	u32 *enc_ctx = ctx->enc.expkey;
 
 	des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src);
 }
@@ -58,7 +58,7 @@
 static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst,
 					 const u8 *src)
 {
-	u32 *dec_ctx = ctx->dec_expkey;
+	u32 *dec_ctx = ctx->dec.expkey;
 
 	des3_ede_x86_64_crypt_blk_3way(dec_ctx, dst, src);
 }
@@ -122,7 +122,7 @@
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return ecb_crypt(req, ctx->enc_expkey);
+	return ecb_crypt(req, ctx->enc.expkey);
 }
 
 static int ecb_decrypt(struct skcipher_request *req)
@@ -130,7 +130,7 @@
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct des3_ede_x86_ctx *ctx = crypto_skcipher_ctx(tfm);
 
-	return ecb_crypt(req, ctx->dec_expkey);
+	return ecb_crypt(req, ctx->dec.expkey);
 }
 
 static unsigned int __cbc_encrypt(struct des3_ede_x86_ctx *ctx,
@@ -348,20 +348,28 @@
 	u32 i, j, tmp;
 	int err;
 
-	/* Generate encryption context using generic implementation. */
-	err = __des3_ede_setkey(ctx->enc_expkey, &tfm->crt_flags, key, keylen);
-	if (err < 0)
+	err = des3_ede_expand_key(&ctx->enc, key, keylen);
+	if (err == -ENOKEY) {
+		if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)
+			err = -EINVAL;
+		else
+			err = 0;
+	}
+
+	if (err) {
+		memset(ctx, 0, sizeof(*ctx));
 		return err;
+	}
 
 	/* Fix encryption context for this implementation and form decryption
 	 * context. */
 	j = DES3_EDE_EXPKEY_WORDS - 2;
 	for (i = 0; i < DES3_EDE_EXPKEY_WORDS; i += 2, j -= 2) {
-		tmp = ror32(ctx->enc_expkey[i + 1], 4);
-		ctx->enc_expkey[i + 1] = tmp;
+		tmp = ror32(ctx->enc.expkey[i + 1], 4);
+		ctx->enc.expkey[i + 1] = tmp;
 
-		ctx->dec_expkey[j + 0] = ctx->enc_expkey[i + 0];
-		ctx->dec_expkey[j + 1] = tmp;
+		ctx->dec.expkey[j + 0] = ctx->enc.expkey[i + 0];
+		ctx->dec.expkey[j + 1] = tmp;
 	}
 
 	return 0;
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index ac76fe8..04d72a5 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -357,6 +357,5 @@
 module_exit(ghash_pclmulqdqni_mod_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
-		   "accelerated by PCLMULQDQ-NI");
+MODULE_DESCRIPTION("GHASH hash function, accelerated by PCLMULQDQ-NI");
 MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/x86/crypto/glue_helper.c b/arch/x86/crypto/glue_helper.c
index 9015514..d15b993 100644
--- a/arch/x86/crypto/glue_helper.c
+++ b/arch/x86/crypto/glue_helper.c
@@ -14,6 +14,7 @@
 #include <crypto/b128ops.h>
 #include <crypto/gf128mul.h>
 #include <crypto/internal/skcipher.h>
+#include <crypto/scatterwalk.h>
 #include <crypto/xts.h>
 #include <asm/crypto/glue_helper.h>
 
@@ -259,17 +260,36 @@
 int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			struct skcipher_request *req,
 			common_glue_func_t tweak_fn, void *tweak_ctx,
-			void *crypt_ctx)
+			void *crypt_ctx, bool decrypt)
 {
+	const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
 	const unsigned int bsize = 128 / 8;
+	struct skcipher_request subreq;
 	struct skcipher_walk walk;
 	bool fpu_enabled = false;
-	unsigned int nbytes;
+	unsigned int nbytes, tail;
 	int err;
 
+	if (req->cryptlen < XTS_BLOCK_SIZE)
+		return -EINVAL;
+
+	if (unlikely(cts)) {
+		struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+
+		tail = req->cryptlen % XTS_BLOCK_SIZE + XTS_BLOCK_SIZE;
+
+		skcipher_request_set_tfm(&subreq, tfm);
+		skcipher_request_set_callback(&subreq,
+					      crypto_skcipher_get_flags(tfm),
+					      NULL, NULL);
+		skcipher_request_set_crypt(&subreq, req->src, req->dst,
+					   req->cryptlen - tail, req->iv);
+		req = &subreq;
+	}
+
 	err = skcipher_walk_virt(&walk, req, false);
 	nbytes = walk.nbytes;
-	if (!nbytes)
+	if (err)
 		return err;
 
 	/* set minimum length to bsize, for tweak_fn */
@@ -287,6 +307,47 @@
 		nbytes = walk.nbytes;
 	}
 
+	if (unlikely(cts)) {
+		u8 *next_tweak, *final_tweak = req->iv;
+		struct scatterlist *src, *dst;
+		struct scatterlist s[2], d[2];
+		le128 b[2];
+
+		dst = src = scatterwalk_ffwd(s, req->src, req->cryptlen);
+		if (req->dst != req->src)
+			dst = scatterwalk_ffwd(d, req->dst, req->cryptlen);
+
+		if (decrypt) {
+			next_tweak = memcpy(b, req->iv, XTS_BLOCK_SIZE);
+			gf128mul_x_ble(b, b);
+		} else {
+			next_tweak = req->iv;
+		}
+
+		skcipher_request_set_crypt(&subreq, src, dst, XTS_BLOCK_SIZE,
+					   next_tweak);
+
+		err = skcipher_walk_virt(&walk, req, false) ?:
+		      skcipher_walk_done(&walk,
+				__glue_xts_req_128bit(gctx, crypt_ctx, &walk));
+		if (err)
+			goto out;
+
+		scatterwalk_map_and_copy(b, dst, 0, XTS_BLOCK_SIZE, 0);
+		memcpy(b + 1, b, tail - XTS_BLOCK_SIZE);
+		scatterwalk_map_and_copy(b, src, XTS_BLOCK_SIZE,
+					 tail - XTS_BLOCK_SIZE, 0);
+		scatterwalk_map_and_copy(b, dst, 0, tail, 1);
+
+		skcipher_request_set_crypt(&subreq, dst, dst, XTS_BLOCK_SIZE,
+					   final_tweak);
+
+		err = skcipher_walk_virt(&walk, req, false) ?:
+		      skcipher_walk_done(&walk,
+				__glue_xts_req_128bit(gctx, crypt_ctx, &walk));
+	}
+
+out:
 	glue_fpu_end(fpu_enabled);
 
 	return err;
diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S
deleted file mode 100644
index 5413fee..0000000
--- a/arch/x86/crypto/morus1280-avx2-asm.S
+++ /dev/null
@@ -1,619 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * AVX2 implementation of MORUS-1280
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define SHUFFLE_MASK(i0, i1, i2, i3) \
-	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
-
-#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
-#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
-#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
-
-#define STATE0		%ymm0
-#define STATE0_LOW	%xmm0
-#define STATE1		%ymm1
-#define STATE2		%ymm2
-#define STATE3		%ymm3
-#define STATE4		%ymm4
-#define KEY		%ymm5
-#define MSG		%ymm5
-#define MSG_LOW		%xmm5
-#define T0		%ymm6
-#define T0_LOW		%xmm6
-#define T1		%ymm7
-
-.section .rodata.cst32.morus1280_const, "aM", @progbits, 32
-.align 32
-.Lmorus1280_const:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32
-.align 32
-.Lmorus1280_counter:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
-
-.text
-
-.macro morus1280_round s0, s1, s2, s3, s4, b, w
-	vpand \s1, \s2, T0
-	vpxor T0, \s0, \s0
-	vpxor \s3, \s0, \s0
-	vpsllq $\b, \s0, T0
-	vpsrlq $(64 - \b), \s0, \s0
-	vpxor T0, \s0, \s0
-	vpermq $\w, \s3, \s3
-.endm
-
-/*
- * __morus1280_update: internal ABI
- * input:
- *   STATE[0-4] - input state
- *   MSG        - message block
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update:
-	morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
-	vpxor MSG, STATE1, STATE1
-	morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
-	vpxor MSG, STATE2, STATE2
-	morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
-	vpxor MSG, STATE3, STATE3
-	morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2,  7, MASK2
-	vpxor MSG, STATE4, STATE4
-	morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3,  4, MASK1
-	ret
-ENDPROC(__morus1280_update)
-
-/*
- * __morus1280_update_zero: internal ABI
- * input:
- *   STATE[0-4] - input state
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update_zero:
-	morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1
-	morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2
-	morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3
-	morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2,  7, MASK2
-	morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3,  4, MASK1
-	ret
-ENDPROC(__morus1280_update_zero)
-
-/*
- * __load_partial: internal ABI
- * input:
- *   %rsi - src
- *   %rcx - bytes
- * output:
- *   MSG  - message block
- * changed:
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	vpxor MSG, MSG, MSG
-
-	mov %rcx, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov %rcx, %r8
-	and $0x1E, %r8
-	add %rsi, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov %rcx, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov %rcx, %r8
-	and $0x1C, %r8
-	add %rsi, %r8
-	shl $16, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov %rcx, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov %rcx, %r8
-	and $0x18, %r8
-	add %rsi, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG_LOW
-
-	mov %rcx, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov %rcx, %r8
-	and $0x10, %r8
-	add %rsi, %r8
-	pshufd $MASK2, MSG_LOW, MSG_LOW
-	pinsrq $0, (%r8), MSG_LOW
-
-.Lld_partial_8:
-	mov %rcx, %r8
-	and $0x10, %r8
-	jz .Lld_partial_16
-
-	vpermq $MASK2, MSG, MSG
-	movdqu (%rsi), MSG_LOW
-
-.Lld_partial_16:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   %rdx - dst
- *   %rcx - bytes
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov %rcx, %r8
-	mov %rdx, %r9
-
-	cmp $16, %r8
-	jl .Lst_partial_16
-
-	movdqu T0_LOW, (%r9)
-	vpermq $MASK2, T0, T0
-
-	sub $16, %r8
-	add $16, %r9
-
-.Lst_partial_16:
-	movq T0_LOW, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	pextrq $1, T0_LOW, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $16, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-/*
- * void crypto_morus1280_avx2_init(void *state, const void *key,
- *                                 const void *iv);
- */
-ENTRY(crypto_morus1280_avx2_init)
-	FRAME_BEGIN
-
-	/* load IV: */
-	vpxor STATE0, STATE0, STATE0
-	movdqu (%rdx), STATE0_LOW
-	/* load key: */
-	vmovdqu (%rsi), KEY
-	vmovdqa KEY, STATE1
-	/* load all ones: */
-	vpcmpeqd STATE2, STATE2, STATE2
-	/* load all zeros: */
-	vpxor STATE3, STATE3, STATE3
-	/* load the constant: */
-	vmovdqa .Lmorus1280_const, STATE4
-
-	/* update 16 times with zero: */
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-
-	/* xor-in the key again after updates: */
-	vpxor KEY, STATE1, STATE1
-
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_init)
-
-/*
- * void crypto_morus1280_avx2_ad(void *state, const void *data,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_ad)
-	FRAME_BEGIN
-
-	cmp $32, %rdx
-	jb .Lad_out
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	mov %rsi,  %r8
-	and $0x1F, %r8
-	jnz .Lad_u_loop
-
-.align 4
-.Lad_a_loop:
-	vmovdqa (%rsi), MSG
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_a_loop
-
-	jmp .Lad_cont
-.align 4
-.Lad_u_loop:
-	vmovdqu (%rsi), MSG
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_u_loop
-
-.Lad_cont:
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_ad)
-
-/*
- * void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_enc)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Lenc_out
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	mov %rsi,  %r8
-	or  %rdx,  %r8
-	and $0x1F, %r8
-	jnz .Lenc_u_loop
-
-.align 4
-.Lenc_a_loop:
-	vmovdqa (%rsi), MSG
-	vmovdqa MSG, T0
-	vpxor STATE0, T0, T0
-	vpermq $MASK3, STATE1, T1
-	vpxor T1, T0, T0
-	vpand STATE2, STATE3, T1
-	vpxor T1, T0, T0
-	vmovdqa T0, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_a_loop
-
-	jmp .Lenc_cont
-.align 4
-.Lenc_u_loop:
-	vmovdqu (%rsi), MSG
-	vmovdqa MSG, T0
-	vpxor STATE0, T0, T0
-	vpermq $MASK3, STATE1, T1
-	vpxor T1, T0, T0
-	vpand STATE2, STATE3, T1
-	vpxor T1, T0, T0
-	vmovdqu T0, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_u_loop
-
-.Lenc_cont:
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_enc)
-
-/*
- * void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_enc_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	/* encrypt message: */
-	call __load_partial
-
-	vmovdqa MSG, T0
-	vpxor STATE0, T0, T0
-	vpermq $MASK3, STATE1, T1
-	vpxor T1, T0, T0
-	vpand STATE2, STATE3, T1
-	vpxor T1, T0, T0
-
-	call __store_partial
-
-	call __morus1280_update
-
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_enc_tail)
-
-/*
- * void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_dec)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Ldec_out
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	mov %rsi,  %r8
-	or  %rdx,  %r8
-	and $0x1F, %r8
-	jnz .Ldec_u_loop
-
-.align 4
-.Ldec_a_loop:
-	vmovdqa (%rsi), MSG
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqa MSG, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_a_loop
-
-	jmp .Ldec_cont
-.align 4
-.Ldec_u_loop:
-	vmovdqu (%rsi), MSG
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqu MSG, (%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_u_loop
-
-.Ldec_cont:
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_dec)
-
-/*
- * void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_avx2_dec_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	/* decrypt message: */
-	call __load_partial
-
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqa MSG, T0
-
-	call __store_partial
-
-	/* mask with byte count: */
-	movq %rcx, T0_LOW
-	vpbroadcastb T0_LOW, T0
-	vmovdqa .Lmorus1280_counter, T1
-	vpcmpgtb T1, T0, T0
-	vpand T0, MSG, MSG
-
-	call __morus1280_update
-
-	/* store the state: */
-	vmovdqu STATE0, (0 * 32)(%rdi)
-	vmovdqu STATE1, (1 * 32)(%rdi)
-	vmovdqu STATE2, (2 * 32)(%rdi)
-	vmovdqu STATE3, (3 * 32)(%rdi)
-	vmovdqu STATE4, (4 * 32)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_dec_tail)
-
-/*
- * void crypto_morus1280_avx2_final(void *state, void *tag_xor,
- *                                  u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_morus1280_avx2_final)
-	FRAME_BEGIN
-
-	/* load the state: */
-	vmovdqu (0 * 32)(%rdi), STATE0
-	vmovdqu (1 * 32)(%rdi), STATE1
-	vmovdqu (2 * 32)(%rdi), STATE2
-	vmovdqu (3 * 32)(%rdi), STATE3
-	vmovdqu (4 * 32)(%rdi), STATE4
-
-	/* xor state[0] into state[4]: */
-	vpxor STATE0, STATE4, STATE4
-
-	/* prepare length block: */
-	vpxor MSG, MSG, MSG
-	vpinsrq $0, %rdx, MSG_LOW, MSG_LOW
-	vpinsrq $1, %rcx, MSG_LOW, MSG_LOW
-	vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */
-
-	/* update state: */
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-
-	/* xor tag: */
-	vmovdqu (%rsi), MSG
-
-	vpxor STATE0, MSG, MSG
-	vpermq $MASK3, STATE1, T0
-	vpxor T0, MSG, MSG
-	vpand STATE2, STATE3, T0
-	vpxor T0, MSG, MSG
-	vmovdqu MSG, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_avx2_final)
diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c
deleted file mode 100644
index 2d000d6..0000000
--- a/arch/x86/crypto/morus1280-avx2-glue.c
+++ /dev/null
@@ -1,62 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Glue for AVX2 implementation
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/morus1280_glue.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key,
-					   const void *iv);
-asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data,
-					 unsigned int length);
-
-asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src,
-					  void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src,
-					  void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor,
-					    u64 assoclen, u64 cryptlen);
-
-MORUS1280_DECLARE_ALG(avx2, "morus1280-avx2", 400);
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_morus1280_avx2_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_AVX2) ||
-	    !boot_cpu_has(X86_FEATURE_OSXSAVE) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_morus1280_avx2_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_morus1280_avx2_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_morus1280_avx2_alg, 1, &simd_alg);
-}
-
-module_init(crypto_morus1280_avx2_module_init);
-module_exit(crypto_morus1280_avx2_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation");
-MODULE_ALIAS_CRYPTO("morus1280");
-MODULE_ALIAS_CRYPTO("morus1280-avx2");
diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S
deleted file mode 100644
index 0eece77..0000000
--- a/arch/x86/crypto/morus1280-sse2-asm.S
+++ /dev/null
@@ -1,893 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * SSE2 implementation of MORUS-1280
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define SHUFFLE_MASK(i0, i1, i2, i3) \
-	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
-
-#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
-
-#define STATE0_LO	%xmm0
-#define STATE0_HI	%xmm1
-#define STATE1_LO	%xmm2
-#define STATE1_HI	%xmm3
-#define STATE2_LO	%xmm4
-#define STATE2_HI	%xmm5
-#define STATE3_LO	%xmm6
-#define STATE3_HI	%xmm7
-#define STATE4_LO	%xmm8
-#define STATE4_HI	%xmm9
-#define KEY_LO		%xmm10
-#define KEY_HI		%xmm11
-#define MSG_LO		%xmm10
-#define MSG_HI		%xmm11
-#define T0_LO		%xmm12
-#define T0_HI		%xmm13
-#define T1_LO		%xmm14
-#define T1_HI		%xmm15
-
-.section .rodata.cst16.morus640_const, "aM", @progbits, 16
-.align 16
-.Lmorus640_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Lmorus640_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
-.align 16
-.Lmorus640_counter_0:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-.Lmorus640_counter_1:
-	.byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
-	.byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
-
-.text
-
-.macro rol1 hi, lo
-	/*
-	 * HI_1 | HI_0 || LO_1 | LO_0
-	 *  ==>
-	 * HI_0 | HI_1 || LO_1 | LO_0
-	 *  ==>
-	 * HI_0 | LO_1 || LO_0 | HI_1
-	 */
-	pshufd $MASK2, \hi, \hi
-	movdqa \hi, T0_LO
-	punpcklqdq \lo, T0_LO
-	punpckhqdq \hi, \lo
-	movdqa \lo, \hi
-	movdqa T0_LO, \lo
-.endm
-
-.macro rol2 hi, lo
-	movdqa \lo, T0_LO
-	movdqa \hi, \lo
-	movdqa T0_LO, \hi
-.endm
-
-.macro rol3 hi, lo
-	/*
-	 * HI_1 | HI_0 || LO_1 | LO_0
-	 *  ==>
-	 * HI_0 | HI_1 || LO_1 | LO_0
-	 *  ==>
-	 * LO_0 | HI_1 || HI_0 | LO_1
-	 */
-	pshufd $MASK2, \hi, \hi
-	movdqa \lo, T0_LO
-	punpckhqdq \hi, T0_LO
-	punpcklqdq \lo, \hi
-	movdqa T0_LO, \lo
-.endm
-
-.macro morus1280_round s0_l, s0_h, s1_l, s1_h, s2_l, s2_h, s3_l, s3_h, s4_l, s4_h, b, w
-	movdqa \s1_l, T0_LO
-	pand \s2_l, T0_LO
-	pxor T0_LO, \s0_l
-
-	movdqa \s1_h, T0_LO
-	pand \s2_h, T0_LO
-	pxor T0_LO, \s0_h
-
-	pxor \s3_l, \s0_l
-	pxor \s3_h, \s0_h
-
-	movdqa \s0_l, T0_LO
-	psllq $\b, T0_LO
-	psrlq $(64 - \b), \s0_l
-	pxor T0_LO, \s0_l
-
-	movdqa \s0_h, T0_LO
-	psllq $\b, T0_LO
-	psrlq $(64 - \b), \s0_h
-	pxor T0_LO, \s0_h
-
-	\w \s3_h, \s3_l
-.endm
-
-/*
- * __morus1280_update: internal ABI
- * input:
- *   STATE[0-4] - input state
- *   MSG        - message block
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update:
-	morus1280_round \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		13, rol1
-	pxor MSG_LO, STATE1_LO
-	pxor MSG_HI, STATE1_HI
-	morus1280_round \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		46, rol2
-	pxor MSG_LO, STATE2_LO
-	pxor MSG_HI, STATE2_HI
-	morus1280_round \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		38, rol3
-	pxor MSG_LO, STATE3_LO
-	pxor MSG_HI, STATE3_HI
-	morus1280_round \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		7, rol2
-	pxor MSG_LO, STATE4_LO
-	pxor MSG_HI, STATE4_HI
-	morus1280_round \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		4, rol1
-	ret
-ENDPROC(__morus1280_update)
-
-/*
- * __morus1280_update_zero: internal ABI
- * input:
- *   STATE[0-4] - input state
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus1280_update_zero:
-	morus1280_round \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		13, rol1
-	morus1280_round \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		46, rol2
-	morus1280_round \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		38, rol3
-	morus1280_round \
-		STATE3_LO, STATE3_HI, \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		7, rol2
-	morus1280_round \
-		STATE4_LO, STATE4_HI, \
-		STATE0_LO, STATE0_HI, \
-		STATE1_LO, STATE1_HI, \
-		STATE2_LO, STATE2_HI, \
-		STATE3_LO, STATE3_HI, \
-		4, rol1
-	ret
-ENDPROC(__morus1280_update_zero)
-
-/*
- * __load_partial: internal ABI
- * input:
- *   %rsi - src
- *   %rcx - bytes
- * output:
- *   MSG  - message block
- * changed:
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG_LO, MSG_LO
-	pxor MSG_HI, MSG_HI
-
-	mov %rcx, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov %rcx, %r8
-	and $0x1E, %r8
-	add %rsi, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov %rcx, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov %rcx, %r8
-	and $0x1C, %r8
-	add %rsi, %r8
-	shl $16, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov %rcx, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov %rcx, %r8
-	and $0x18, %r8
-	add %rsi, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG_LO
-
-	mov %rcx, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov %rcx, %r8
-	and $0x10, %r8
-	add %rsi, %r8
-	pslldq $8, MSG_LO
-	movq (%r8), T0_LO
-	pxor T0_LO, MSG_LO
-
-.Lld_partial_8:
-	mov %rcx, %r8
-	and $0x10, %r8
-	jz .Lld_partial_16
-
-	movdqa MSG_LO, MSG_HI
-	movdqu (%rsi), MSG_LO
-
-.Lld_partial_16:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   %rdx - dst
- *   %rcx - bytes
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov %rcx, %r8
-	mov %rdx, %r9
-
-	cmp $16, %r8
-	jl .Lst_partial_16
-
-	movdqu T0_LO, (%r9)
-	movdqa T0_HI, T0_LO
-
-	sub $16, %r8
-	add $16, %r9
-
-.Lst_partial_16:
-	movq T0_LO, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0_LO
-	movq T0_LO, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $16, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-/*
- * void crypto_morus1280_sse2_init(void *state, const void *key,
- *                                 const void *iv);
- */
-ENTRY(crypto_morus1280_sse2_init)
-	FRAME_BEGIN
-
-	/* load IV: */
-	pxor STATE0_HI, STATE0_HI
-	movdqu (%rdx), STATE0_LO
-	/* load key: */
-	movdqu  0(%rsi), KEY_LO
-	movdqu 16(%rsi), KEY_HI
-	movdqa KEY_LO, STATE1_LO
-	movdqa KEY_HI, STATE1_HI
-	/* load all ones: */
-	pcmpeqd STATE2_LO, STATE2_LO
-	pcmpeqd STATE2_HI, STATE2_HI
-	/* load all zeros: */
-	pxor STATE3_LO, STATE3_LO
-	pxor STATE3_HI, STATE3_HI
-	/* load the constant: */
-	movdqa .Lmorus640_const_0, STATE4_LO
-	movdqa .Lmorus640_const_1, STATE4_HI
-
-	/* update 16 times with zero: */
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-	call __morus1280_update_zero
-
-	/* xor-in the key again after updates: */
-	pxor KEY_LO, STATE1_LO
-	pxor KEY_HI, STATE1_HI
-
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_init)
-
-/*
- * void crypto_morus1280_sse2_ad(void *state, const void *data,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_ad)
-	FRAME_BEGIN
-
-	cmp $32, %rdx
-	jb .Lad_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	mov %rsi, %r8
-	and $0xF, %r8
-	jnz .Lad_u_loop
-
-.align 4
-.Lad_a_loop:
-	movdqa  0(%rsi), MSG_LO
-	movdqa 16(%rsi), MSG_HI
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_a_loop
-
-	jmp .Lad_cont
-.align 4
-.Lad_u_loop:
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-	call __morus1280_update
-	sub $32, %rdx
-	add $32, %rsi
-	cmp $32, %rdx
-	jge .Lad_u_loop
-
-.Lad_cont:
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_ad)
-
-/*
- * void crypto_morus1280_sse2_enc(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_enc)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Lenc_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Lenc_u_loop
-
-.align 4
-.Lenc_a_loop:
-	movdqa  0(%rsi), MSG_LO
-	movdqa 16(%rsi), MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	pxor STATE0_LO, T0_LO
-	pxor STATE0_HI, T0_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	movdqa T0_LO,  0(%rdx)
-	movdqa T0_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_a_loop
-
-	jmp .Lenc_cont
-.align 4
-.Lenc_u_loop:
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	pxor STATE0_LO, T0_LO
-	pxor STATE0_HI, T0_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	movdqu T0_LO,  0(%rdx)
-	movdqu T0_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Lenc_u_loop
-
-.Lenc_cont:
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_enc)
-
-/*
- * void crypto_morus1280_sse2_enc_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_enc_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-	pxor STATE0_LO, T0_LO
-	pxor STATE0_HI, T0_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, T0_LO
-	pxor T1_HI, T0_HI
-
-	call __store_partial
-
-	call __morus1280_update
-
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_enc_tail)
-
-/*
- * void crypto_morus1280_sse2_dec(void *state, const void *src, void *dst,
- *                                unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_dec)
-	FRAME_BEGIN
-
-	cmp $32, %rcx
-	jb .Ldec_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 4
-.Ldec_a_loop:
-	movdqa  0(%rsi), MSG_LO
-	movdqa 16(%rsi), MSG_HI
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa MSG_LO,  0(%rdx)
-	movdqa MSG_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_a_loop
-
-	jmp .Ldec_cont
-.align 4
-.Ldec_u_loop:
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqu MSG_LO,  0(%rdx)
-	movdqu MSG_HI, 16(%rdx)
-
-	call __morus1280_update
-	sub $32, %rcx
-	add $32, %rsi
-	add $32, %rdx
-	cmp $32, %rcx
-	jge .Ldec_u_loop
-
-.Ldec_cont:
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_dec)
-
-/*
- * void crypto_morus1280_sse2_dec_tail(void *state, const void *src, void *dst,
- *                                     unsigned int length);
- */
-ENTRY(crypto_morus1280_sse2_dec_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	/* decrypt message: */
-	call __load_partial
-
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T1_LO
-	movdqa STATE1_HI, T1_HI
-	rol3 T1_HI, T1_LO
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa STATE2_LO, T1_LO
-	movdqa STATE2_HI, T1_HI
-	pand STATE3_LO, T1_LO
-	pand STATE3_HI, T1_HI
-	pxor T1_LO, MSG_LO
-	pxor T1_HI, MSG_HI
-	movdqa MSG_LO, T0_LO
-	movdqa MSG_HI, T0_HI
-
-	call __store_partial
-
-	/* mask with byte count: */
-	movq %rcx, T0_LO
-	punpcklbw T0_LO, T0_LO
-	punpcklbw T0_LO, T0_LO
-	punpcklbw T0_LO, T0_LO
-	punpcklbw T0_LO, T0_LO
-	movdqa T0_LO, T0_HI
-	movdqa .Lmorus640_counter_0, T1_LO
-	movdqa .Lmorus640_counter_1, T1_HI
-	pcmpgtb T1_LO, T0_LO
-	pcmpgtb T1_HI, T0_HI
-	pand T0_LO, MSG_LO
-	pand T0_HI, MSG_HI
-
-	call __morus1280_update
-
-	/* store the state: */
-	movdqu STATE0_LO, (0 * 16)(%rdi)
-	movdqu STATE0_HI, (1 * 16)(%rdi)
-	movdqu STATE1_LO, (2 * 16)(%rdi)
-	movdqu STATE1_HI, (3 * 16)(%rdi)
-	movdqu STATE2_LO, (4 * 16)(%rdi)
-	movdqu STATE2_HI, (5 * 16)(%rdi)
-	movdqu STATE3_LO, (6 * 16)(%rdi)
-	movdqu STATE3_HI, (7 * 16)(%rdi)
-	movdqu STATE4_LO, (8 * 16)(%rdi)
-	movdqu STATE4_HI, (9 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_dec_tail)
-
-/*
- * void crypto_morus1280_sse2_final(void *state, void *tag_xor,
- *                                  u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_morus1280_sse2_final)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0_LO
-	movdqu (1 * 16)(%rdi), STATE0_HI
-	movdqu (2 * 16)(%rdi), STATE1_LO
-	movdqu (3 * 16)(%rdi), STATE1_HI
-	movdqu (4 * 16)(%rdi), STATE2_LO
-	movdqu (5 * 16)(%rdi), STATE2_HI
-	movdqu (6 * 16)(%rdi), STATE3_LO
-	movdqu (7 * 16)(%rdi), STATE3_HI
-	movdqu (8 * 16)(%rdi), STATE4_LO
-	movdqu (9 * 16)(%rdi), STATE4_HI
-
-	/* xor state[0] into state[4]: */
-	pxor STATE0_LO, STATE4_LO
-	pxor STATE0_HI, STATE4_HI
-
-	/* prepare length block: */
-	movq %rdx, MSG_LO
-	movq %rcx, T0_LO
-	pslldq $8, T0_LO
-	pxor T0_LO, MSG_LO
-	psllq $3, MSG_LO /* multiply by 8 (to get bit count) */
-	pxor MSG_HI, MSG_HI
-
-	/* update state: */
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-	call __morus1280_update
-
-	/* xor tag: */
-	movdqu  0(%rsi), MSG_LO
-	movdqu 16(%rsi), MSG_HI
-
-	pxor STATE0_LO, MSG_LO
-	pxor STATE0_HI, MSG_HI
-	movdqa STATE1_LO, T0_LO
-	movdqa STATE1_HI, T0_HI
-	rol3 T0_HI, T0_LO
-	pxor T0_LO, MSG_LO
-	pxor T0_HI, MSG_HI
-	movdqa STATE2_LO, T0_LO
-	movdqa STATE2_HI, T0_HI
-	pand STATE3_LO, T0_LO
-	pand STATE3_HI, T0_HI
-	pxor T0_LO, MSG_LO
-	pxor T0_HI, MSG_HI
-
-	movdqu MSG_LO,  0(%rsi)
-	movdqu MSG_HI, 16(%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus1280_sse2_final)
diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c
deleted file mode 100644
index aada9d7..0000000
--- a/arch/x86/crypto/morus1280-sse2-glue.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Glue for SSE2 implementation
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/morus1280_glue.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key,
-					   const void *iv);
-asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data,
-					 unsigned int length);
-
-asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src,
-					  void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src,
-					  void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src,
-					       void *dst, unsigned int length);
-
-asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor,
-					    u64 assoclen, u64 cryptlen);
-
-MORUS1280_DECLARE_ALG(sse2, "morus1280-sse2", 350);
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_morus1280_sse2_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_morus1280_sse2_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_morus1280_sse2_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_morus1280_sse2_alg, 1, &simd_alg);
-}
-
-module_init(crypto_morus1280_sse2_module_init);
-module_exit(crypto_morus1280_sse2_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation");
-MODULE_ALIAS_CRYPTO("morus1280");
-MODULE_ALIAS_CRYPTO("morus1280-sse2");
diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c
deleted file mode 100644
index ffbde8b..0000000
--- a/arch/x86/crypto/morus1280_glue.c
+++ /dev/null
@@ -1,205 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Common x86 SIMD glue skeleton
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/morus1280_glue.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-#include <asm/fpu/api.h>
-
-struct morus1280_state {
-	struct morus1280_block s[MORUS_STATE_BLOCKS];
-};
-
-struct morus1280_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_blocks)(void *state, const void *src, void *dst,
-			     unsigned int length);
-	void (*crypt_tail)(void *state, const void *src, void *dst,
-			   unsigned int length);
-};
-
-static void crypto_morus1280_glue_process_ad(
-		struct morus1280_state *state,
-		const struct morus1280_glue_ops *ops,
-		struct scatterlist *sg_src, unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct morus1280_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= MORUS1280_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			ops->ad(state, src, left);
-			src += left & ~(MORUS1280_BLOCK_SIZE - 1);
-			left &= MORUS1280_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
-		ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE);
-	}
-}
-
-static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state,
-						struct morus1280_ops ops,
-						struct skcipher_walk *walk)
-{
-	while (walk->nbytes >= MORUS1280_BLOCK_SIZE) {
-		ops.crypt_blocks(state, walk->src.virt.addr,
-				 walk->dst.virt.addr,
-				 round_down(walk->nbytes,
-					    MORUS1280_BLOCK_SIZE));
-		skcipher_walk_done(walk, walk->nbytes % MORUS1280_BLOCK_SIZE);
-	}
-
-	if (walk->nbytes) {
-		ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
-			       walk->nbytes);
-		skcipher_walk_done(walk, 0);
-	}
-}
-
-int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
-				 unsigned int keylen)
-{
-	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
-
-	if (keylen == MORUS1280_BLOCK_SIZE) {
-		memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE);
-	} else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
-		memcpy(ctx->key.bytes, key, keylen);
-		memcpy(ctx->key.bytes + keylen, key, keylen);
-	} else {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey);
-
-int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
-				      unsigned int authsize)
-{
-	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize);
-
-static void crypto_morus1280_glue_crypt(struct aead_request *req,
-					struct morus1280_ops ops,
-					unsigned int cryptlen,
-					struct morus1280_block *tag_xor)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_state state;
-	struct skcipher_walk walk;
-
-	ops.skcipher_walk_init(&walk, req, true);
-
-	kernel_fpu_begin();
-
-	ctx->ops->init(&state, &ctx->key, req->iv);
-	crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
-	crypto_morus1280_glue_process_crypt(&state, ops, &walk);
-	ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
-
-	kernel_fpu_end();
-}
-
-int crypto_morus1280_glue_encrypt(struct aead_request *req)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_blocks = ctx->ops->enc,
-		.crypt_tail = ctx->ops->enc_tail,
-	};
-
-	struct morus1280_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt);
-
-int crypto_morus1280_glue_decrypt(struct aead_request *req)
-{
-	static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_blocks = ctx->ops->dec,
-		.crypt_tail = ctx->ops->dec_tail,
-	};
-
-	struct morus1280_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag);
-
-	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt);
-
-void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
-				    const struct morus1280_glue_ops *ops)
-{
-	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
-	ctx->ops = ops;
-}
-EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S
deleted file mode 100644
index a608911..0000000
--- a/arch/x86/crypto/morus640-sse2-asm.S
+++ /dev/null
@@ -1,612 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * SSE2 implementation of MORUS-640
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <linux/linkage.h>
-#include <asm/frame.h>
-
-#define SHUFFLE_MASK(i0, i1, i2, i3) \
-	(i0 | (i1 << 2) | (i2 << 4) | (i3 << 6))
-
-#define MASK1 SHUFFLE_MASK(3, 0, 1, 2)
-#define MASK2 SHUFFLE_MASK(2, 3, 0, 1)
-#define MASK3 SHUFFLE_MASK(1, 2, 3, 0)
-
-#define STATE0	%xmm0
-#define STATE1	%xmm1
-#define STATE2	%xmm2
-#define STATE3	%xmm3
-#define STATE4	%xmm4
-#define KEY	%xmm5
-#define MSG	%xmm5
-#define T0	%xmm6
-#define T1	%xmm7
-
-.section .rodata.cst16.morus640_const, "aM", @progbits, 32
-.align 16
-.Lmorus640_const_0:
-	.byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d
-	.byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62
-.Lmorus640_const_1:
-	.byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1
-	.byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd
-
-.section .rodata.cst16.morus640_counter, "aM", @progbits, 16
-.align 16
-.Lmorus640_counter:
-	.byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07
-	.byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
-
-.text
-
-.macro morus640_round s0, s1, s2, s3, s4, b, w
-	movdqa \s1, T0
-	pand \s2, T0
-	pxor T0, \s0
-	pxor \s3, \s0
-	movdqa \s0, T0
-	pslld $\b, T0
-	psrld $(32 - \b), \s0
-	pxor T0, \s0
-	pshufd $\w, \s3, \s3
-.endm
-
-/*
- * __morus640_update: internal ABI
- * input:
- *   STATE[0-4] - input state
- *   MSG        - message block
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus640_update:
-	morus640_round STATE0, STATE1, STATE2, STATE3, STATE4,  5, MASK1
-	pxor MSG, STATE1
-	morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
-	pxor MSG, STATE2
-	morus640_round STATE2, STATE3, STATE4, STATE0, STATE1,  7, MASK3
-	pxor MSG, STATE3
-	morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
-	pxor MSG, STATE4
-	morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
-	ret
-ENDPROC(__morus640_update)
-
-
-/*
- * __morus640_update_zero: internal ABI
- * input:
- *   STATE[0-4] - input state
- * output:
- *   STATE[0-4] - output state
- * changed:
- *   T0
- */
-__morus640_update_zero:
-	morus640_round STATE0, STATE1, STATE2, STATE3, STATE4,  5, MASK1
-	morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2
-	morus640_round STATE2, STATE3, STATE4, STATE0, STATE1,  7, MASK3
-	morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2
-	morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1
-	ret
-ENDPROC(__morus640_update_zero)
-
-/*
- * __load_partial: internal ABI
- * input:
- *   %rsi - src
- *   %rcx - bytes
- * output:
- *   MSG  - message block
- * changed:
- *   T0
- *   %r8
- *   %r9
- */
-__load_partial:
-	xor %r9d, %r9d
-	pxor MSG, MSG
-
-	mov %rcx, %r8
-	and $0x1, %r8
-	jz .Lld_partial_1
-
-	mov %rcx, %r8
-	and $0x1E, %r8
-	add %rsi, %r8
-	mov (%r8), %r9b
-
-.Lld_partial_1:
-	mov %rcx, %r8
-	and $0x2, %r8
-	jz .Lld_partial_2
-
-	mov %rcx, %r8
-	and $0x1C, %r8
-	add %rsi, %r8
-	shl $16, %r9
-	mov (%r8), %r9w
-
-.Lld_partial_2:
-	mov %rcx, %r8
-	and $0x4, %r8
-	jz .Lld_partial_4
-
-	mov %rcx, %r8
-	and $0x18, %r8
-	add %rsi, %r8
-	shl $32, %r9
-	mov (%r8), %r8d
-	xor %r8, %r9
-
-.Lld_partial_4:
-	movq %r9, MSG
-
-	mov %rcx, %r8
-	and $0x8, %r8
-	jz .Lld_partial_8
-
-	mov %rcx, %r8
-	and $0x10, %r8
-	add %rsi, %r8
-	pslldq $8, MSG
-	movq (%r8), T0
-	pxor T0, MSG
-
-.Lld_partial_8:
-	ret
-ENDPROC(__load_partial)
-
-/*
- * __store_partial: internal ABI
- * input:
- *   %rdx - dst
- *   %rcx - bytes
- * output:
- *   T0   - message block
- * changed:
- *   %r8
- *   %r9
- *   %r10
- */
-__store_partial:
-	mov %rcx, %r8
-	mov %rdx, %r9
-
-	movq T0, %r10
-
-	cmp $8, %r8
-	jl .Lst_partial_8
-
-	mov %r10, (%r9)
-	psrldq $8, T0
-	movq T0, %r10
-
-	sub $8, %r8
-	add $8, %r9
-
-.Lst_partial_8:
-	cmp $4, %r8
-	jl .Lst_partial_4
-
-	mov %r10d, (%r9)
-	shr $32, %r10
-
-	sub $4, %r8
-	add $4, %r9
-
-.Lst_partial_4:
-	cmp $2, %r8
-	jl .Lst_partial_2
-
-	mov %r10w, (%r9)
-	shr $16, %r10
-
-	sub $2, %r8
-	add $2, %r9
-
-.Lst_partial_2:
-	cmp $1, %r8
-	jl .Lst_partial_1
-
-	mov %r10b, (%r9)
-
-.Lst_partial_1:
-	ret
-ENDPROC(__store_partial)
-
-/*
- * void crypto_morus640_sse2_init(void *state, const void *key, const void *iv);
- */
-ENTRY(crypto_morus640_sse2_init)
-	FRAME_BEGIN
-
-	/* load IV: */
-	movdqu (%rdx), STATE0
-	/* load key: */
-	movdqu (%rsi), KEY
-	movdqa KEY, STATE1
-	/* load all ones: */
-	pcmpeqd STATE2, STATE2
-	/* load the constants: */
-	movdqa .Lmorus640_const_0, STATE3
-	movdqa .Lmorus640_const_1, STATE4
-
-	/* update 16 times with zero: */
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-	call __morus640_update_zero
-
-	/* xor-in the key again after updates: */
-	pxor KEY, STATE1
-
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_init)
-
-/*
- * void crypto_morus640_sse2_ad(void *state, const void *data,
- *                              unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_ad)
-	FRAME_BEGIN
-
-	cmp $16, %rdx
-	jb .Lad_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	mov %rsi, %r8
-	and $0xF, %r8
-	jnz .Lad_u_loop
-
-.align 4
-.Lad_a_loop:
-	movdqa (%rsi), MSG
-	call __morus640_update
-	sub $16, %rdx
-	add $16, %rsi
-	cmp $16, %rdx
-	jge .Lad_a_loop
-
-	jmp .Lad_cont
-.align 4
-.Lad_u_loop:
-	movdqu (%rsi), MSG
-	call __morus640_update
-	sub $16, %rdx
-	add $16, %rsi
-	cmp $16, %rdx
-	jge .Lad_u_loop
-
-.Lad_cont:
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-.Lad_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_ad)
-
-/*
- * void crypto_morus640_sse2_enc(void *state, const void *src, void *dst,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_enc)
-	FRAME_BEGIN
-
-	cmp $16, %rcx
-	jb .Lenc_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Lenc_u_loop
-
-.align 4
-.Lenc_a_loop:
-	movdqa (%rsi), MSG
-	movdqa MSG, T0
-	pxor STATE0, T0
-	pshufd $MASK3, STATE1, T1
-	pxor T1, T0
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, T0
-	movdqa T0, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Lenc_a_loop
-
-	jmp .Lenc_cont
-.align 4
-.Lenc_u_loop:
-	movdqu (%rsi), MSG
-	movdqa MSG, T0
-	pxor STATE0, T0
-	pshufd $MASK3, STATE1, T1
-	pxor T1, T0
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, T0
-	movdqu T0, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Lenc_u_loop
-
-.Lenc_cont:
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-.Lenc_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_enc)
-
-/*
- * void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst,
- *                                    unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_enc_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	/* encrypt message: */
-	call __load_partial
-
-	movdqa MSG, T0
-	pxor STATE0, T0
-	pshufd $MASK3, STATE1, T1
-	pxor T1, T0
-	movdqa STATE2, T1
-	pand STATE3, T1
-	pxor T1, T0
-
-	call __store_partial
-
-	call __morus640_update
-
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_enc_tail)
-
-/*
- * void crypto_morus640_sse2_dec(void *state, const void *src, void *dst,
- *                               unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_dec)
-	FRAME_BEGIN
-
-	cmp $16, %rcx
-	jb .Ldec_out
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	mov %rsi, %r8
-	or  %rdx, %r8
-	and $0xF, %r8
-	jnz .Ldec_u_loop
-
-.align 4
-.Ldec_a_loop:
-	movdqa (%rsi), MSG
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-	movdqa MSG, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Ldec_a_loop
-
-	jmp .Ldec_cont
-.align 4
-.Ldec_u_loop:
-	movdqu (%rsi), MSG
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-	movdqu MSG, (%rdx)
-
-	call __morus640_update
-	sub $16, %rcx
-	add $16, %rsi
-	add $16, %rdx
-	cmp $16, %rcx
-	jge .Ldec_u_loop
-
-.Ldec_cont:
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-.Ldec_out:
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_dec)
-
-/*
- * void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst,
- *                                    unsigned int length);
- */
-ENTRY(crypto_morus640_sse2_dec_tail)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	/* decrypt message: */
-	call __load_partial
-
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-	movdqa MSG, T0
-
-	call __store_partial
-
-	/* mask with byte count: */
-	movq %rcx, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	punpcklbw T0, T0
-	movdqa .Lmorus640_counter, T1
-	pcmpgtb T1, T0
-	pand T0, MSG
-
-	call __morus640_update
-
-	/* store the state: */
-	movdqu STATE0, (0 * 16)(%rdi)
-	movdqu STATE1, (1 * 16)(%rdi)
-	movdqu STATE2, (2 * 16)(%rdi)
-	movdqu STATE3, (3 * 16)(%rdi)
-	movdqu STATE4, (4 * 16)(%rdi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_dec_tail)
-
-/*
- * void crypto_morus640_sse2_final(void *state, void *tag_xor,
- *	                           u64 assoclen, u64 cryptlen);
- */
-ENTRY(crypto_morus640_sse2_final)
-	FRAME_BEGIN
-
-	/* load the state: */
-	movdqu (0 * 16)(%rdi), STATE0
-	movdqu (1 * 16)(%rdi), STATE1
-	movdqu (2 * 16)(%rdi), STATE2
-	movdqu (3 * 16)(%rdi), STATE3
-	movdqu (4 * 16)(%rdi), STATE4
-
-	/* xor state[0] into state[4]: */
-	pxor STATE0, STATE4
-
-	/* prepare length block: */
-	movq %rdx, MSG
-	movq %rcx, T0
-	pslldq $8, T0
-	pxor T0, MSG
-	psllq $3, MSG /* multiply by 8 (to get bit count) */
-
-	/* update state: */
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-	call __morus640_update
-
-	/* xor tag: */
-	movdqu (%rsi), MSG
-
-	pxor STATE0, MSG
-	pshufd $MASK3, STATE1, T0
-	pxor T0, MSG
-	movdqa STATE2, T0
-	pand STATE3, T0
-	pxor T0, MSG
-
-	movdqu MSG, (%rsi)
-
-	FRAME_END
-	ret
-ENDPROC(crypto_morus640_sse2_final)
diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c
deleted file mode 100644
index 8ef6813..0000000
--- a/arch/x86/crypto/morus640-sse2-glue.c
+++ /dev/null
@@ -1,61 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-640 Authenticated-Encryption Algorithm
- *   Glue for SSE2 implementation
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/simd.h>
-#include <crypto/morus640_glue.h>
-#include <linux/module.h>
-#include <asm/fpu/api.h>
-#include <asm/cpu_device_id.h>
-
-asmlinkage void crypto_morus640_sse2_init(void *state, const void *key,
-					  const void *iv);
-asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data,
-					unsigned int length);
-
-asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src,
-					 void *dst, unsigned int length);
-asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src,
-					 void *dst, unsigned int length);
-
-asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src,
-					      void *dst, unsigned int length);
-asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src,
-					      void *dst, unsigned int length);
-
-asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor,
-					   u64 assoclen, u64 cryptlen);
-
-MORUS640_DECLARE_ALG(sse2, "morus640-sse2", 400);
-
-static struct simd_aead_alg *simd_alg;
-
-static int __init crypto_morus640_sse2_module_init(void)
-{
-	if (!boot_cpu_has(X86_FEATURE_XMM2) ||
-	    !cpu_has_xfeatures(XFEATURE_MASK_SSE, NULL))
-		return -ENODEV;
-
-	return simd_register_aeads_compat(&crypto_morus640_sse2_alg, 1,
-					  &simd_alg);
-}
-
-static void __exit crypto_morus640_sse2_module_exit(void)
-{
-	simd_unregister_aeads(&crypto_morus640_sse2_alg, 1, &simd_alg);
-}
-
-module_init(crypto_morus640_sse2_module_init);
-module_exit(crypto_morus640_sse2_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation");
-MODULE_ALIAS_CRYPTO("morus640");
-MODULE_ALIAS_CRYPTO("morus640-sse2");
diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c
deleted file mode 100644
index d8b5fd6..0000000
--- a/arch/x86/crypto/morus640_glue.c
+++ /dev/null
@@ -1,200 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-640 Authenticated-Encryption Algorithm
- *   Common x86 SIMD glue skeleton
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/morus640_glue.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-#include <asm/fpu/api.h>
-
-struct morus640_state {
-	struct morus640_block s[MORUS_STATE_BLOCKS];
-};
-
-struct morus640_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_blocks)(void *state, const void *src, void *dst,
-			     unsigned int length);
-	void (*crypt_tail)(void *state, const void *src, void *dst,
-			   unsigned int length);
-};
-
-static void crypto_morus640_glue_process_ad(
-		struct morus640_state *state,
-		const struct morus640_glue_ops *ops,
-		struct scatterlist *sg_src, unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct morus640_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= MORUS640_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = MORUS640_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			ops->ad(state, src, left);
-			src += left & ~(MORUS640_BLOCK_SIZE - 1);
-			left &= MORUS640_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
-		ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE);
-	}
-}
-
-static void crypto_morus640_glue_process_crypt(struct morus640_state *state,
-					       struct morus640_ops ops,
-					       struct skcipher_walk *walk)
-{
-	while (walk->nbytes >= MORUS640_BLOCK_SIZE) {
-		ops.crypt_blocks(state, walk->src.virt.addr,
-				 walk->dst.virt.addr,
-				 round_down(walk->nbytes, MORUS640_BLOCK_SIZE));
-		skcipher_walk_done(walk, walk->nbytes % MORUS640_BLOCK_SIZE);
-	}
-
-	if (walk->nbytes) {
-		ops.crypt_tail(state, walk->src.virt.addr, walk->dst.virt.addr,
-			       walk->nbytes);
-		skcipher_walk_done(walk, 0);
-	}
-}
-
-int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
-				unsigned int keylen)
-{
-	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
-
-	if (keylen != MORUS640_BLOCK_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey);
-
-int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
-				     unsigned int authsize)
-{
-	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize);
-
-static void crypto_morus640_glue_crypt(struct aead_request *req,
-				       struct morus640_ops ops,
-				       unsigned int cryptlen,
-				       struct morus640_block *tag_xor)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_state state;
-	struct skcipher_walk walk;
-
-	ops.skcipher_walk_init(&walk, req, true);
-
-	kernel_fpu_begin();
-
-	ctx->ops->init(&state, &ctx->key, req->iv);
-	crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen);
-	crypto_morus640_glue_process_crypt(&state, ops, &walk);
-	ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen);
-
-	kernel_fpu_end();
-}
-
-int crypto_morus640_glue_encrypt(struct aead_request *req)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_blocks = ctx->ops->enc,
-		.crypt_tail = ctx->ops->enc_tail,
-	};
-
-	struct morus640_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt);
-
-int crypto_morus640_glue_decrypt(struct aead_request *req)
-{
-	static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_ops OPS = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_blocks = ctx->ops->dec,
-		.crypt_tail = ctx->ops->dec_tail,
-	};
-
-	struct morus640_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag);
-
-	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt);
-
-void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
-				   const struct morus640_glue_ops *ops)
-{
-	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
-	ctx->ops = ops;
-}
-EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations");
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index b871728..13fd8d3 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -167,7 +167,7 @@
 
 	return glue_xts_req_128bit(&serpent_enc_xts, req,
 				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -177,7 +177,7 @@
 
 	return glue_xts_req_128bit(&serpent_dec_xts, req,
 				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg serpent_algs[] = {
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index 4a9a9f2..7d3dca3 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -207,7 +207,7 @@
 
 	return glue_xts_req_128bit(&serpent_enc_xts, req,
 				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -217,7 +217,7 @@
 
 	return glue_xts_req_128bit(&serpent_dec_xts, req,
 				   XTS_TWEAK_CAST(__serpent_encrypt),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg serpent_algs[] = {
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index 73867da..f9aff31 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -45,8 +45,8 @@
 				       u64 rounds);
 typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds);
 
-static int sha256_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int len, sha256_transform_fn *sha256_xform)
+static int _sha256_update(struct shash_desc *desc, const u8 *data,
+			  unsigned int len, sha256_transform_fn *sha256_xform)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
@@ -84,7 +84,7 @@
 static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
-	return sha256_update(desc, data, len, sha256_transform_ssse3);
+	return _sha256_update(desc, data, len, sha256_transform_ssse3);
 }
 
 static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data,
@@ -151,7 +151,7 @@
 static int sha256_avx_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
-	return sha256_update(desc, data, len, sha256_transform_avx);
+	return _sha256_update(desc, data, len, sha256_transform_avx);
 }
 
 static int sha256_avx_finup(struct shash_desc *desc, const u8 *data,
@@ -233,7 +233,7 @@
 static int sha256_avx2_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
-	return sha256_update(desc, data, len, sha256_transform_rorx);
+	return _sha256_update(desc, data, len, sha256_transform_rorx);
 }
 
 static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data,
@@ -313,7 +313,7 @@
 static int sha256_ni_update(struct shash_desc *desc, const u8 *data,
 			 unsigned int len)
 {
-	return sha256_update(desc, data, len, sha256_ni_transform);
+	return _sha256_update(desc, data, len, sha256_ni_transform);
 }
 
 static int sha256_ni_finup(struct shash_desc *desc, const u8 *data,
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 0dbf8e8..d561c82 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -210,7 +210,7 @@
 
 	return glue_xts_req_128bit(&twofish_enc_xts, req,
 				   XTS_TWEAK_CAST(twofish_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, false);
 }
 
 static int xts_decrypt(struct skcipher_request *req)
@@ -220,7 +220,7 @@
 
 	return glue_xts_req_128bit(&twofish_dec_xts, req,
 				   XTS_TWEAK_CAST(twofish_enc_blk),
-				   &ctx->tweak_ctx, &ctx->crypt_ctx);
+				   &ctx->tweak_ctx, &ctx->crypt_ctx, true);
 }
 
 static struct skcipher_alg twofish_algs[] = {
diff --git a/arch/x86/include/asm/crypto/aes.h b/arch/x86/include/asm/crypto/aes.h
deleted file mode 100644
index c508521..0000000
--- a/arch/x86/include/asm/crypto/aes.h
+++ /dev/null
@@ -1,12 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASM_X86_AES_H
-#define ASM_X86_AES_H
-
-#include <linux/crypto.h>
-#include <crypto/aes.h>
-
-void crypto_aes_encrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
-			    const u8 *src);
-void crypto_aes_decrypt_x86(struct crypto_aes_ctx *ctx, u8 *dst,
-			    const u8 *src);
-#endif
diff --git a/arch/x86/include/asm/crypto/glue_helper.h b/arch/x86/include/asm/crypto/glue_helper.h
index d181863..8d4a8e1 100644
--- a/arch/x86/include/asm/crypto/glue_helper.h
+++ b/arch/x86/include/asm/crypto/glue_helper.h
@@ -114,7 +114,7 @@
 extern int glue_xts_req_128bit(const struct common_glue_ctx *gctx,
 			       struct skcipher_request *req,
 			       common_glue_func_t tweak_fn, void *tweak_ctx,
-			       void *crypt_ctx);
+			       void *crypt_ctx, bool decrypt);
 
 extern void glue_xts_crypt_128bit_one(void *ctx, u128 *dst, const u128 *src,
 				      le128 *iv, common_glue_func_t fn);
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 10fb42d..5277490 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -9,9 +9,11 @@
 $(obj)/string.o: $(srctree)/arch/x86/boot/compressed/string.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
-$(obj)/sha256.o: $(srctree)/lib/sha256.c FORCE
+$(obj)/sha256.o: $(srctree)/lib/crypto/sha256.c FORCE
 	$(call if_changed_rule,cc_o_c)
 
+CFLAGS_sha256.o := -D__DISABLE_EXPORTS
+
 LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
 targets += purgatory.ro
 
diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c
index b607bda..3b95410 100644
--- a/arch/x86/purgatory/purgatory.c
+++ b/arch/x86/purgatory/purgatory.c
@@ -9,7 +9,7 @@
  */
 
 #include <linux/bug.h>
-#include <linux/sha256.h>
+#include <crypto/sha.h>
 #include <asm/purgatory.h>
 
 #include "../boot/string.h"
diff --git a/crypto/Kconfig b/crypto/Kconfig
index e801450..ad86463 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -306,19 +306,10 @@
 	help
 	 Support for the AEGIS-128 dedicated AEAD algorithm.
 
-config CRYPTO_AEGIS128L
-	tristate "AEGIS-128L AEAD algorithm"
-	select CRYPTO_AEAD
-	select CRYPTO_AES  # for AES S-box tables
-	help
-	 Support for the AEGIS-128L dedicated AEAD algorithm.
-
-config CRYPTO_AEGIS256
-	tristate "AEGIS-256 AEAD algorithm"
-	select CRYPTO_AEAD
-	select CRYPTO_AES  # for AES S-box tables
-	help
-	 Support for the AEGIS-256 dedicated AEAD algorithm.
+config CRYPTO_AEGIS128_SIMD
+	bool "Support SIMD acceleration for AEGIS-128"
+	depends on CRYPTO_AEGIS128 && ((ARM || ARM64) && KERNEL_MODE_NEON)
+	default y
 
 config CRYPTO_AEGIS128_AESNI_SSE2
 	tristate "AEGIS-128 AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
@@ -328,78 +319,6 @@
 	help
 	 AESNI+SSE2 implementation of the AEGIS-128 dedicated AEAD algorithm.
 
-config CRYPTO_AEGIS128L_AESNI_SSE2
-	tristate "AEGIS-128L AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
-	depends on X86 && 64BIT
-	select CRYPTO_AEAD
-	select CRYPTO_SIMD
-	help
-	 AESNI+SSE2 implementation of the AEGIS-128L dedicated AEAD algorithm.
-
-config CRYPTO_AEGIS256_AESNI_SSE2
-	tristate "AEGIS-256 AEAD algorithm (x86_64 AESNI+SSE2 implementation)"
-	depends on X86 && 64BIT
-	select CRYPTO_AEAD
-	select CRYPTO_SIMD
-	help
-	 AESNI+SSE2 implementation of the AEGIS-256 dedicated AEAD algorithm.
-
-config CRYPTO_MORUS640
-	tristate "MORUS-640 AEAD algorithm"
-	select CRYPTO_AEAD
-	help
-	  Support for the MORUS-640 dedicated AEAD algorithm.
-
-config CRYPTO_MORUS640_GLUE
-	tristate
-	depends on X86
-	select CRYPTO_AEAD
-	select CRYPTO_SIMD
-	help
-	  Common glue for SIMD optimizations of the MORUS-640 dedicated AEAD
-	  algorithm.
-
-config CRYPTO_MORUS640_SSE2
-	tristate "MORUS-640 AEAD algorithm (x86_64 SSE2 implementation)"
-	depends on X86 && 64BIT
-	select CRYPTO_AEAD
-	select CRYPTO_MORUS640_GLUE
-	help
-	  SSE2 implementation of the MORUS-640 dedicated AEAD algorithm.
-
-config CRYPTO_MORUS1280
-	tristate "MORUS-1280 AEAD algorithm"
-	select CRYPTO_AEAD
-	help
-	  Support for the MORUS-1280 dedicated AEAD algorithm.
-
-config CRYPTO_MORUS1280_GLUE
-	tristate
-	depends on X86
-	select CRYPTO_AEAD
-	select CRYPTO_SIMD
-	help
-	  Common glue for SIMD optimizations of the MORUS-1280 dedicated AEAD
-	  algorithm.
-
-config CRYPTO_MORUS1280_SSE2
-	tristate "MORUS-1280 AEAD algorithm (x86_64 SSE2 implementation)"
-	depends on X86 && 64BIT
-	select CRYPTO_AEAD
-	select CRYPTO_MORUS1280_GLUE
-	help
-	  SSE2 optimizedimplementation of the MORUS-1280 dedicated AEAD
-	  algorithm.
-
-config CRYPTO_MORUS1280_AVX2
-	tristate "MORUS-1280 AEAD algorithm (x86_64 AVX2 implementation)"
-	depends on X86 && 64BIT
-	select CRYPTO_AEAD
-	select CRYPTO_MORUS1280_GLUE
-	help
-	  AVX2 optimized implementation of the MORUS-1280 dedicated AEAD
-	  algorithm.
-
 config CRYPTO_SEQIV
 	tristate "Sequence Number IV Generator"
 	select CRYPTO_AEAD
@@ -728,11 +647,12 @@
 	  Unless you are testing these algorithms, you don't need this.
 
 config CRYPTO_GHASH
-	tristate "GHASH digest algorithm"
+	tristate "GHASH hash function"
 	select CRYPTO_GF128MUL
 	select CRYPTO_HASH
 	help
-	  GHASH is message digest algorithm for GCM (Galois/Counter Mode).
+	  GHASH is the hash function used in GCM (Galois/Counter Mode).
+	  It is not a general-purpose cryptographic hash function.
 
 config CRYPTO_POLY1305
 	tristate "Poly1305 authenticator algorithm"
@@ -929,9 +849,13 @@
 	  SHA-1 secure hash standard (DFIPS 180-4) implemented
 	  using powerpc SPE SIMD instruction set.
 
+config CRYPTO_LIB_SHA256
+	tristate
+
 config CRYPTO_SHA256
 	tristate "SHA224 and SHA256 digest algorithm"
 	select CRYPTO_HASH
+	select CRYPTO_LIB_SHA256
 	help
 	  SHA256 secure hash standard (DFIPS 180-2).
 
@@ -1057,18 +981,22 @@
 	  <http://www.larc.usp.br/~pbarreto/WhirlpoolPage.html>
 
 config CRYPTO_GHASH_CLMUL_NI_INTEL
-	tristate "GHASH digest algorithm (CLMUL-NI accelerated)"
+	tristate "GHASH hash function (CLMUL-NI accelerated)"
 	depends on X86 && 64BIT
 	select CRYPTO_CRYPTD
 	help
-	  GHASH is message digest algorithm for GCM (Galois/Counter Mode).
-	  The implementation is accelerated by CLMUL-NI of Intel.
+	  This is the x86_64 CLMUL-NI accelerated implementation of
+	  GHASH, the hash function used in GCM (Galois/Counter mode).
 
 comment "Ciphers"
 
+config CRYPTO_LIB_AES
+	tristate
+
 config CRYPTO_AES
 	tristate "AES cipher algorithms"
 	select CRYPTO_ALGAPI
+	select CRYPTO_LIB_AES
 	help
 	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
 	  algorithm.
@@ -1089,6 +1017,7 @@
 config CRYPTO_AES_TI
 	tristate "Fixed time AES cipher"
 	select CRYPTO_ALGAPI
+	select CRYPTO_LIB_AES
 	help
 	  This is a generic implementation of AES that attempts to eliminate
 	  data dependent latencies as much as possible without affecting
@@ -1104,56 +1033,11 @@
 	  block. Interrupts are also disabled to avoid races where cachelines
 	  are evicted when the CPU is interrupted to do something else.
 
-config CRYPTO_AES_586
-	tristate "AES cipher algorithms (i586)"
-	depends on (X86 || UML_X86) && !64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_AES
-	help
-	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
-	  algorithm.
-
-	  Rijndael appears to be consistently a very good performer in
-	  both hardware and software across a wide range of computing
-	  environments regardless of its use in feedback or non-feedback
-	  modes. Its key setup time is excellent, and its key agility is
-	  good. Rijndael's very low memory requirements make it very well
-	  suited for restricted-space environments, in which it also
-	  demonstrates excellent performance. Rijndael's operations are
-	  among the easiest to defend against power and timing attacks.
-
-	  The AES specifies three key sizes: 128, 192 and 256 bits
-
-	  See <http://csrc.nist.gov/encryption/aes/> for more information.
-
-config CRYPTO_AES_X86_64
-	tristate "AES cipher algorithms (x86_64)"
-	depends on (X86 || UML_X86) && 64BIT
-	select CRYPTO_ALGAPI
-	select CRYPTO_AES
-	help
-	  AES cipher algorithms (FIPS-197). AES uses the Rijndael
-	  algorithm.
-
-	  Rijndael appears to be consistently a very good performer in
-	  both hardware and software across a wide range of computing
-	  environments regardless of its use in feedback or non-feedback
-	  modes. Its key setup time is excellent, and its key agility is
-	  good. Rijndael's very low memory requirements make it very well
-	  suited for restricted-space environments, in which it also
-	  demonstrates excellent performance. Rijndael's operations are
-	  among the easiest to defend against power and timing attacks.
-
-	  The AES specifies three key sizes: 128, 192 and 256 bits
-
-	  See <http://csrc.nist.gov/encryption/aes/> for more information.
-
 config CRYPTO_AES_NI_INTEL
 	tristate "AES cipher algorithms (AES-NI)"
 	depends on X86
 	select CRYPTO_AEAD
-	select CRYPTO_AES_X86_64 if 64BIT
-	select CRYPTO_AES_586 if !64BIT
+	select CRYPTO_LIB_AES
 	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_GLUE_HELPER_X86 if 64BIT
@@ -1426,9 +1310,13 @@
 	  This module provides the Cast6 cipher algorithm that processes
 	  eight blocks parallel using the AVX instruction set.
 
+config CRYPTO_LIB_DES
+	tristate
+
 config CRYPTO_DES
 	tristate "DES and Triple DES EDE cipher algorithms"
 	select CRYPTO_ALGAPI
+	select CRYPTO_LIB_DES
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
 
@@ -1436,7 +1324,7 @@
 	tristate "DES and Triple DES EDE cipher algorithms (SPARC64)"
 	depends on SPARC64
 	select CRYPTO_ALGAPI
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3),
 	  optimized using SPARC64 crypto opcodes.
@@ -1445,7 +1333,7 @@
 	tristate "Triple DES EDE cipher algorithm (x86-64)"
 	depends on X86 && 64BIT
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
 	  Triple DES EDE (FIPS 46-3) algorithm.
 
diff --git a/crypto/Makefile b/crypto/Makefile
index 9479e1a..0d2cdd5 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -90,10 +90,26 @@
 obj-$(CONFIG_CRYPTO_CCM) += ccm.o
 obj-$(CONFIG_CRYPTO_CHACHA20POLY1305) += chacha20poly1305.o
 obj-$(CONFIG_CRYPTO_AEGIS128) += aegis128.o
-obj-$(CONFIG_CRYPTO_AEGIS128L) += aegis128l.o
-obj-$(CONFIG_CRYPTO_AEGIS256) += aegis256.o
-obj-$(CONFIG_CRYPTO_MORUS640) += morus640.o
-obj-$(CONFIG_CRYPTO_MORUS1280) += morus1280.o
+aegis128-y := aegis128-core.o
+
+ifeq ($(ARCH),arm)
+CFLAGS_aegis128-neon-inner.o += -ffreestanding -march=armv7-a -mfloat-abi=softfp
+CFLAGS_aegis128-neon-inner.o += -mfpu=crypto-neon-fp-armv8
+aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
+endif
+ifeq ($(ARCH),arm64)
+aegis128-cflags-y := -ffreestanding -mcpu=generic+crypto
+aegis128-cflags-$(CONFIG_CC_IS_GCC) += -ffixed-q16 -ffixed-q17 -ffixed-q18 \
+				       -ffixed-q19 -ffixed-q20 -ffixed-q21 \
+				       -ffixed-q22 -ffixed-q23 -ffixed-q24 \
+				       -ffixed-q25 -ffixed-q26 -ffixed-q27 \
+				       -ffixed-q28 -ffixed-q29 -ffixed-q30 \
+				       -ffixed-q31
+CFLAGS_aegis128-neon-inner.o += $(aegis128-cflags-y)
+CFLAGS_REMOVE_aegis128-neon-inner.o += -mgeneral-regs-only
+aegis128-$(CONFIG_CRYPTO_AEGIS128_SIMD) += aegis128-neon.o aegis128-neon-inner.o
+endif
+
 obj-$(CONFIG_CRYPTO_PCRYPT) += pcrypt.o
 obj-$(CONFIG_CRYPTO_CRYPTD) += cryptd.o
 obj-$(CONFIG_CRYPTO_DES) += des_generic.o
@@ -136,6 +152,8 @@
 obj-$(CONFIG_CRYPTO_DRBG) += drbg.o
 obj-$(CONFIG_CRYPTO_JITTERENTROPY) += jitterentropy_rng.o
 CFLAGS_jitterentropy.o = -O0
+KASAN_SANITIZE_jitterentropy.o = n
+UBSAN_SANITIZE_jitterentropy.o = n
 jitterentropy_rng-y := jitterentropy.o jitterentropy-kcapi.o
 obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o
 obj-$(CONFIG_CRYPTO_GHASH) += ghash-generic.o
diff --git a/crypto/aead.c b/crypto/aead.c
index fbf0ec9..ce03558 100644
--- a/crypto/aead.c
+++ b/crypto/aead.c
@@ -70,7 +70,8 @@
 {
 	int err;
 
-	if (authsize > crypto_aead_maxauthsize(tfm))
+	if ((!authsize && crypto_aead_maxauthsize(tfm)) ||
+	    authsize > crypto_aead_maxauthsize(tfm))
 		return -EINVAL;
 
 	if (crypto_aead_alg(tfm)->setauthsize) {
diff --git a/crypto/aegis.h b/crypto/aegis.h
index 41a3090..6920ebe 100644
--- a/crypto/aegis.h
+++ b/crypto/aegis.h
@@ -10,6 +10,7 @@
 #define _CRYPTO_AEGIS_H
 
 #include <crypto/aes.h>
+#include <linux/bitops.h>
 #include <linux/types.h>
 
 #define AEGIS_BLOCK_SIZE 16
@@ -23,46 +24,32 @@
 #define AEGIS_BLOCK_ALIGN (__alignof__(union aegis_block))
 #define AEGIS_ALIGNED(p) IS_ALIGNED((uintptr_t)p, AEGIS_BLOCK_ALIGN)
 
-static const union aegis_block crypto_aegis_const[2] = {
-	{ .words64 = {
-		cpu_to_le64(U64_C(0x0d08050302010100)),
-		cpu_to_le64(U64_C(0x6279e99059372215)),
-	} },
-	{ .words64 = {
-		cpu_to_le64(U64_C(0xf12fc26d55183ddb)),
-		cpu_to_le64(U64_C(0xdd28b57342311120)),
-	} },
-};
-
-static void crypto_aegis_block_xor(union aegis_block *dst,
-				   const union aegis_block *src)
+static __always_inline void crypto_aegis_block_xor(union aegis_block *dst,
+						   const union aegis_block *src)
 {
 	dst->words64[0] ^= src->words64[0];
 	dst->words64[1] ^= src->words64[1];
 }
 
-static void crypto_aegis_block_and(union aegis_block *dst,
-				   const union aegis_block *src)
+static __always_inline void crypto_aegis_block_and(union aegis_block *dst,
+						   const union aegis_block *src)
 {
 	dst->words64[0] &= src->words64[0];
 	dst->words64[1] &= src->words64[1];
 }
 
-static void crypto_aegis_aesenc(union aegis_block *dst,
-				const union aegis_block *src,
-				const union aegis_block *key)
+static __always_inline void crypto_aegis_aesenc(union aegis_block *dst,
+						const union aegis_block *src,
+						const union aegis_block *key)
 {
 	const u8  *s  = src->bytes;
-	const u32 *t0 = crypto_ft_tab[0];
-	const u32 *t1 = crypto_ft_tab[1];
-	const u32 *t2 = crypto_ft_tab[2];
-	const u32 *t3 = crypto_ft_tab[3];
+	const u32 *t = crypto_ft_tab[0];
 	u32 d0, d1, d2, d3;
 
-	d0 = t0[s[ 0]] ^ t1[s[ 5]] ^ t2[s[10]] ^ t3[s[15]];
-	d1 = t0[s[ 4]] ^ t1[s[ 9]] ^ t2[s[14]] ^ t3[s[ 3]];
-	d2 = t0[s[ 8]] ^ t1[s[13]] ^ t2[s[ 2]] ^ t3[s[ 7]];
-	d3 = t0[s[12]] ^ t1[s[ 1]] ^ t2[s[ 6]] ^ t3[s[11]];
+	d0 = t[s[ 0]] ^ rol32(t[s[ 5]], 8) ^ rol32(t[s[10]], 16) ^ rol32(t[s[15]], 24);
+	d1 = t[s[ 4]] ^ rol32(t[s[ 9]], 8) ^ rol32(t[s[14]], 16) ^ rol32(t[s[ 3]], 24);
+	d2 = t[s[ 8]] ^ rol32(t[s[13]], 8) ^ rol32(t[s[ 2]], 16) ^ rol32(t[s[ 7]], 24);
+	d3 = t[s[12]] ^ rol32(t[s[ 1]], 8) ^ rol32(t[s[ 6]], 16) ^ rol32(t[s[11]], 24);
 
 	dst->words32[0] = cpu_to_le32(d0) ^ key->words32[0];
 	dst->words32[1] = cpu_to_le32(d1) ^ key->words32[1];
diff --git a/crypto/aegis128.c b/crypto/aegis128-core.c
similarity index 87%
rename from crypto/aegis128.c
rename to crypto/aegis128-core.c
index d78f77f..80e7361 100644
--- a/crypto/aegis128.c
+++ b/crypto/aegis128-core.c
@@ -8,6 +8,7 @@
 
 #include <crypto/algapi.h>
 #include <crypto/internal/aead.h>
+#include <crypto/internal/simd.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/scatterwalk.h>
 #include <linux/err.h>
@@ -16,6 +17,8 @@
 #include <linux/module.h>
 #include <linux/scatterlist.h>
 
+#include <asm/simd.h>
+
 #include "aegis.h"
 
 #define AEGIS128_NONCE_SIZE 16
@@ -40,6 +43,35 @@
 			    const u8 *src, unsigned int size);
 };
 
+static bool have_simd;
+
+static const union aegis_block crypto_aegis_const[2] = {
+	{ .words64 = {
+		cpu_to_le64(U64_C(0x0d08050302010100)),
+		cpu_to_le64(U64_C(0x6279e99059372215)),
+	} },
+	{ .words64 = {
+		cpu_to_le64(U64_C(0xf12fc26d55183ddb)),
+		cpu_to_le64(U64_C(0xdd28b57342311120)),
+	} },
+};
+
+static bool aegis128_do_simd(void)
+{
+#ifdef CONFIG_CRYPTO_AEGIS128_SIMD
+	if (have_simd)
+		return crypto_simd_usable();
+#endif
+	return false;
+}
+
+bool crypto_aegis128_have_simd(void);
+void crypto_aegis128_update_simd(struct aegis_state *state, const void *msg);
+void crypto_aegis128_encrypt_chunk_simd(struct aegis_state *state, u8 *dst,
+					const u8 *src, unsigned int size);
+void crypto_aegis128_decrypt_chunk_simd(struct aegis_state *state, u8 *dst,
+					const u8 *src, unsigned int size);
+
 static void crypto_aegis128_update(struct aegis_state *state)
 {
 	union aegis_block tmp;
@@ -55,12 +87,22 @@
 static void crypto_aegis128_update_a(struct aegis_state *state,
 				     const union aegis_block *msg)
 {
+	if (aegis128_do_simd()) {
+		crypto_aegis128_update_simd(state, msg);
+		return;
+	}
+
 	crypto_aegis128_update(state);
 	crypto_aegis_block_xor(&state->blocks[0], msg);
 }
 
 static void crypto_aegis128_update_u(struct aegis_state *state, const void *msg)
 {
+	if (aegis128_do_simd()) {
+		crypto_aegis128_update_simd(state, msg);
+		return;
+	}
+
 	crypto_aegis128_update(state);
 	crypto_xor(state->blocks[0].bytes, msg, AEGIS_BLOCK_SIZE);
 }
@@ -365,7 +407,7 @@
 
 static int crypto_aegis128_encrypt(struct aead_request *req)
 {
-	static const struct aegis128_ops ops = {
+	const struct aegis128_ops *ops = &(struct aegis128_ops){
 		.skcipher_walk_init = skcipher_walk_aead_encrypt,
 		.crypt_chunk = crypto_aegis128_encrypt_chunk,
 	};
@@ -375,7 +417,12 @@
 	unsigned int authsize = crypto_aead_authsize(tfm);
 	unsigned int cryptlen = req->cryptlen;
 
-	crypto_aegis128_crypt(req, &tag, cryptlen, &ops);
+	if (aegis128_do_simd())
+		ops = &(struct aegis128_ops){
+			.skcipher_walk_init = skcipher_walk_aead_encrypt,
+			.crypt_chunk = crypto_aegis128_encrypt_chunk_simd };
+
+	crypto_aegis128_crypt(req, &tag, cryptlen, ops);
 
 	scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
 				 authsize, 1);
@@ -384,7 +431,7 @@
 
 static int crypto_aegis128_decrypt(struct aead_request *req)
 {
-	static const struct aegis128_ops ops = {
+	const struct aegis128_ops *ops = &(struct aegis128_ops){
 		.skcipher_walk_init = skcipher_walk_aead_decrypt,
 		.crypt_chunk = crypto_aegis128_decrypt_chunk,
 	};
@@ -398,27 +445,21 @@
 	scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
 				 authsize, 0);
 
-	crypto_aegis128_crypt(req, &tag, cryptlen, &ops);
+	if (aegis128_do_simd())
+		ops = &(struct aegis128_ops){
+			.skcipher_walk_init = skcipher_walk_aead_decrypt,
+			.crypt_chunk = crypto_aegis128_decrypt_chunk_simd };
+
+	crypto_aegis128_crypt(req, &tag, cryptlen, ops);
 
 	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
 }
 
-static int crypto_aegis128_init_tfm(struct crypto_aead *tfm)
-{
-	return 0;
-}
-
-static void crypto_aegis128_exit_tfm(struct crypto_aead *tfm)
-{
-}
-
 static struct aead_alg crypto_aegis128_alg = {
 	.setkey = crypto_aegis128_setkey,
 	.setauthsize = crypto_aegis128_setauthsize,
 	.encrypt = crypto_aegis128_encrypt,
 	.decrypt = crypto_aegis128_decrypt,
-	.init = crypto_aegis128_init_tfm,
-	.exit = crypto_aegis128_exit_tfm,
 
 	.ivsize = AEGIS128_NONCE_SIZE,
 	.maxauthsize = AEGIS128_MAX_AUTH_SIZE,
@@ -440,6 +481,9 @@
 
 static int __init crypto_aegis128_module_init(void)
 {
+	if (IS_ENABLED(CONFIG_CRYPTO_AEGIS128_SIMD))
+		have_simd = crypto_aegis128_have_simd();
+
 	return crypto_register_aead(&crypto_aegis128_alg);
 }
 
diff --git a/crypto/aegis128-neon-inner.c b/crypto/aegis128-neon-inner.c
new file mode 100644
index 0000000..f05310c
--- /dev/null
+++ b/crypto/aegis128-neon-inner.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org>
+ */
+
+#ifdef CONFIG_ARM64
+#include <asm/neon-intrinsics.h>
+
+#define AES_ROUND	"aese %0.16b, %1.16b \n\t aesmc %0.16b, %0.16b"
+#else
+#include <arm_neon.h>
+
+#define AES_ROUND	"aese.8 %q0, %q1 \n\t aesmc.8 %q0, %q0"
+#endif
+
+#define AEGIS_BLOCK_SIZE	16
+
+#include <stddef.h>
+
+extern int aegis128_have_aes_insn;
+
+void *memcpy(void *dest, const void *src, size_t n);
+void *memset(void *s, int c, size_t n);
+
+struct aegis128_state {
+	uint8x16_t v[5];
+};
+
+extern const uint8_t crypto_aes_sbox[];
+
+static struct aegis128_state aegis128_load_state_neon(const void *state)
+{
+	return (struct aegis128_state){ {
+		vld1q_u8(state),
+		vld1q_u8(state + 16),
+		vld1q_u8(state + 32),
+		vld1q_u8(state + 48),
+		vld1q_u8(state + 64)
+	} };
+}
+
+static void aegis128_save_state_neon(struct aegis128_state st, void *state)
+{
+	vst1q_u8(state, st.v[0]);
+	vst1q_u8(state + 16, st.v[1]);
+	vst1q_u8(state + 32, st.v[2]);
+	vst1q_u8(state + 48, st.v[3]);
+	vst1q_u8(state + 64, st.v[4]);
+}
+
+static inline __attribute__((always_inline))
+uint8x16_t aegis_aes_round(uint8x16_t w)
+{
+	uint8x16_t z = {};
+
+#ifdef CONFIG_ARM64
+	if (!__builtin_expect(aegis128_have_aes_insn, 1)) {
+		static const uint8_t shift_rows[] = {
+			0x0, 0x5, 0xa, 0xf, 0x4, 0x9, 0xe, 0x3,
+			0x8, 0xd, 0x2, 0x7, 0xc, 0x1, 0x6, 0xb,
+		};
+		static const uint8_t ror32by8[] = {
+			0x1, 0x2, 0x3, 0x0, 0x5, 0x6, 0x7, 0x4,
+			0x9, 0xa, 0xb, 0x8, 0xd, 0xe, 0xf, 0xc,
+		};
+		uint8x16_t v;
+
+		// shift rows
+		w = vqtbl1q_u8(w, vld1q_u8(shift_rows));
+
+		// sub bytes
+#ifndef CONFIG_CC_IS_GCC
+		v = vqtbl4q_u8(vld1q_u8_x4(crypto_aes_sbox), w);
+		v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x40), w - 0x40);
+		v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0x80), w - 0x80);
+		v = vqtbx4q_u8(v, vld1q_u8_x4(crypto_aes_sbox + 0xc0), w - 0xc0);
+#else
+		asm("tbl %0.16b, {v16.16b-v19.16b}, %1.16b" : "=w"(v) : "w"(w));
+		w -= 0x40;
+		asm("tbx %0.16b, {v20.16b-v23.16b}, %1.16b" : "+w"(v) : "w"(w));
+		w -= 0x40;
+		asm("tbx %0.16b, {v24.16b-v27.16b}, %1.16b" : "+w"(v) : "w"(w));
+		w -= 0x40;
+		asm("tbx %0.16b, {v28.16b-v31.16b}, %1.16b" : "+w"(v) : "w"(w));
+#endif
+
+		// mix columns
+		w = (v << 1) ^ (uint8x16_t)(((int8x16_t)v >> 7) & 0x1b);
+		w ^= (uint8x16_t)vrev32q_u16((uint16x8_t)v);
+		w ^= vqtbl1q_u8(v ^ w, vld1q_u8(ror32by8));
+
+		return w;
+	}
+#endif
+
+	/*
+	 * We use inline asm here instead of the vaeseq_u8/vaesmcq_u8 intrinsics
+	 * to force the compiler to issue the aese/aesmc instructions in pairs.
+	 * This is much faster on many cores, where the instruction pair can
+	 * execute in a single cycle.
+	 */
+	asm(AES_ROUND : "+w"(w) : "w"(z));
+	return w;
+}
+
+static inline __attribute__((always_inline))
+struct aegis128_state aegis128_update_neon(struct aegis128_state st,
+					   uint8x16_t m)
+{
+	m       ^= aegis_aes_round(st.v[4]);
+	st.v[4] ^= aegis_aes_round(st.v[3]);
+	st.v[3] ^= aegis_aes_round(st.v[2]);
+	st.v[2] ^= aegis_aes_round(st.v[1]);
+	st.v[1] ^= aegis_aes_round(st.v[0]);
+	st.v[0] ^= m;
+
+	return st;
+}
+
+static inline __attribute__((always_inline))
+void preload_sbox(void)
+{
+	if (!IS_ENABLED(CONFIG_ARM64) ||
+	    !IS_ENABLED(CONFIG_CC_IS_GCC) ||
+	    __builtin_expect(aegis128_have_aes_insn, 1))
+		return;
+
+	asm("ld1	{v16.16b-v19.16b}, [%0], #64	\n\t"
+	    "ld1	{v20.16b-v23.16b}, [%0], #64	\n\t"
+	    "ld1	{v24.16b-v27.16b}, [%0], #64	\n\t"
+	    "ld1	{v28.16b-v31.16b}, [%0]		\n\t"
+	    :: "r"(crypto_aes_sbox));
+}
+
+void crypto_aegis128_update_neon(void *state, const void *msg)
+{
+	struct aegis128_state st = aegis128_load_state_neon(state);
+
+	preload_sbox();
+
+	st = aegis128_update_neon(st, vld1q_u8(msg));
+
+	aegis128_save_state_neon(st, state);
+}
+
+void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
+					unsigned int size)
+{
+	struct aegis128_state st = aegis128_load_state_neon(state);
+	uint8x16_t msg;
+
+	preload_sbox();
+
+	while (size >= AEGIS_BLOCK_SIZE) {
+		uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4];
+
+		msg = vld1q_u8(src);
+		st = aegis128_update_neon(st, msg);
+		vst1q_u8(dst, msg ^ s);
+
+		size -= AEGIS_BLOCK_SIZE;
+		src += AEGIS_BLOCK_SIZE;
+		dst += AEGIS_BLOCK_SIZE;
+	}
+
+	if (size > 0) {
+		uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4];
+		uint8_t buf[AEGIS_BLOCK_SIZE] = {};
+
+		memcpy(buf, src, size);
+		msg = vld1q_u8(buf);
+		st = aegis128_update_neon(st, msg);
+		vst1q_u8(buf, msg ^ s);
+		memcpy(dst, buf, size);
+	}
+
+	aegis128_save_state_neon(st, state);
+}
+
+void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
+					unsigned int size)
+{
+	struct aegis128_state st = aegis128_load_state_neon(state);
+	uint8x16_t msg;
+
+	preload_sbox();
+
+	while (size >= AEGIS_BLOCK_SIZE) {
+		msg = vld1q_u8(src) ^ st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4];
+		st = aegis128_update_neon(st, msg);
+		vst1q_u8(dst, msg);
+
+		size -= AEGIS_BLOCK_SIZE;
+		src += AEGIS_BLOCK_SIZE;
+		dst += AEGIS_BLOCK_SIZE;
+	}
+
+	if (size > 0) {
+		uint8x16_t s = st.v[1] ^ (st.v[2] & st.v[3]) ^ st.v[4];
+		uint8_t buf[AEGIS_BLOCK_SIZE];
+
+		vst1q_u8(buf, s);
+		memcpy(buf, src, size);
+		msg = vld1q_u8(buf) ^ s;
+		vst1q_u8(buf, msg);
+		memcpy(dst, buf, size);
+
+		st = aegis128_update_neon(st, msg);
+	}
+
+	aegis128_save_state_neon(st, state);
+}
diff --git a/crypto/aegis128-neon.c b/crypto/aegis128-neon.c
new file mode 100644
index 0000000..751f9c1
--- /dev/null
+++ b/crypto/aegis128-neon.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <asm/cpufeature.h>
+#include <asm/neon.h>
+
+#include "aegis.h"
+
+void crypto_aegis128_update_neon(void *state, const void *msg);
+void crypto_aegis128_encrypt_chunk_neon(void *state, void *dst, const void *src,
+					unsigned int size);
+void crypto_aegis128_decrypt_chunk_neon(void *state, void *dst, const void *src,
+					unsigned int size);
+
+int aegis128_have_aes_insn __ro_after_init;
+
+bool crypto_aegis128_have_simd(void)
+{
+	if (cpu_have_feature(cpu_feature(AES))) {
+		aegis128_have_aes_insn = 1;
+		return true;
+	}
+	return IS_ENABLED(CONFIG_ARM64);
+}
+
+void crypto_aegis128_update_simd(union aegis_block *state, const void *msg)
+{
+	kernel_neon_begin();
+	crypto_aegis128_update_neon(state, msg);
+	kernel_neon_end();
+}
+
+void crypto_aegis128_encrypt_chunk_simd(union aegis_block *state, u8 *dst,
+					const u8 *src, unsigned int size)
+{
+	kernel_neon_begin();
+	crypto_aegis128_encrypt_chunk_neon(state, dst, src, size);
+	kernel_neon_end();
+}
+
+void crypto_aegis128_decrypt_chunk_simd(union aegis_block *state, u8 *dst,
+					const u8 *src, unsigned int size)
+{
+	kernel_neon_begin();
+	crypto_aegis128_decrypt_chunk_neon(state, dst, src, size);
+	kernel_neon_end();
+}
diff --git a/crypto/aegis128l.c b/crypto/aegis128l.c
deleted file mode 100644
index 9bca3d6..0000000
--- a/crypto/aegis128l.c
+++ /dev/null
@@ -1,522 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The AEGIS-128L Authenticated-Encryption Algorithm
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-
-#include "aegis.h"
-
-#define AEGIS128L_CHUNK_BLOCKS 2
-#define AEGIS128L_CHUNK_SIZE (AEGIS128L_CHUNK_BLOCKS * AEGIS_BLOCK_SIZE)
-#define AEGIS128L_NONCE_SIZE 16
-#define AEGIS128L_STATE_BLOCKS 8
-#define AEGIS128L_KEY_SIZE 16
-#define AEGIS128L_MIN_AUTH_SIZE 8
-#define AEGIS128L_MAX_AUTH_SIZE 16
-
-union aegis_chunk {
-	union aegis_block blocks[AEGIS128L_CHUNK_BLOCKS];
-	u8 bytes[AEGIS128L_CHUNK_SIZE];
-};
-
-struct aegis_state {
-	union aegis_block blocks[AEGIS128L_STATE_BLOCKS];
-};
-
-struct aegis_ctx {
-	union aegis_block key;
-};
-
-struct aegis128l_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
-			    const u8 *src, unsigned int size);
-};
-
-static void crypto_aegis128l_update(struct aegis_state *state)
-{
-	union aegis_block tmp;
-	unsigned int i;
-
-	tmp = state->blocks[AEGIS128L_STATE_BLOCKS - 1];
-	for (i = AEGIS128L_STATE_BLOCKS - 1; i > 0; i--)
-		crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
-				    &state->blocks[i]);
-	crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
-}
-
-static void crypto_aegis128l_update_a(struct aegis_state *state,
-				      const union aegis_chunk *msg)
-{
-	crypto_aegis128l_update(state);
-	crypto_aegis_block_xor(&state->blocks[0], &msg->blocks[0]);
-	crypto_aegis_block_xor(&state->blocks[4], &msg->blocks[1]);
-}
-
-static void crypto_aegis128l_update_u(struct aegis_state *state,
-				      const void *msg)
-{
-	crypto_aegis128l_update(state);
-	crypto_xor(state->blocks[0].bytes, msg + 0 * AEGIS_BLOCK_SIZE,
-			AEGIS_BLOCK_SIZE);
-	crypto_xor(state->blocks[4].bytes, msg + 1 * AEGIS_BLOCK_SIZE,
-			AEGIS_BLOCK_SIZE);
-}
-
-static void crypto_aegis128l_init(struct aegis_state *state,
-				  const union aegis_block *key,
-				  const u8 *iv)
-{
-	union aegis_block key_iv;
-	union aegis_chunk chunk;
-	unsigned int i;
-
-	memcpy(chunk.blocks[0].bytes, iv, AEGIS_BLOCK_SIZE);
-	chunk.blocks[1] = *key;
-
-	key_iv = *key;
-	crypto_aegis_block_xor(&key_iv, &chunk.blocks[0]);
-
-	state->blocks[0] = key_iv;
-	state->blocks[1] = crypto_aegis_const[1];
-	state->blocks[2] = crypto_aegis_const[0];
-	state->blocks[3] = crypto_aegis_const[1];
-	state->blocks[4] = key_iv;
-	state->blocks[5] = *key;
-	state->blocks[6] = *key;
-	state->blocks[7] = *key;
-
-	crypto_aegis_block_xor(&state->blocks[5], &crypto_aegis_const[0]);
-	crypto_aegis_block_xor(&state->blocks[6], &crypto_aegis_const[1]);
-	crypto_aegis_block_xor(&state->blocks[7], &crypto_aegis_const[0]);
-
-	for (i = 0; i < 10; i++) {
-		crypto_aegis128l_update_a(state, &chunk);
-	}
-}
-
-static void crypto_aegis128l_ad(struct aegis_state *state,
-				const u8 *src, unsigned int size)
-{
-	if (AEGIS_ALIGNED(src)) {
-		const union aegis_chunk *src_chunk =
-				(const union aegis_chunk *)src;
-
-		while (size >= AEGIS128L_CHUNK_SIZE) {
-			crypto_aegis128l_update_a(state, src_chunk);
-
-			size -= AEGIS128L_CHUNK_SIZE;
-			src_chunk += 1;
-		}
-	} else {
-		while (size >= AEGIS128L_CHUNK_SIZE) {
-			crypto_aegis128l_update_u(state, src);
-
-			size -= AEGIS128L_CHUNK_SIZE;
-			src += AEGIS128L_CHUNK_SIZE;
-		}
-	}
-}
-
-static void crypto_aegis128l_encrypt_chunk(struct aegis_state *state, u8 *dst,
-					   const u8 *src, unsigned int size)
-{
-	union aegis_chunk tmp;
-	union aegis_block *tmp0 = &tmp.blocks[0];
-	union aegis_block *tmp1 = &tmp.blocks[1];
-
-	if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
-		while (size >= AEGIS128L_CHUNK_SIZE) {
-			union aegis_chunk *dst_blk =
-					(union aegis_chunk *)dst;
-			const union aegis_chunk *src_blk =
-					(const union aegis_chunk *)src;
-
-			*tmp0 = state->blocks[2];
-			crypto_aegis_block_and(tmp0, &state->blocks[3]);
-			crypto_aegis_block_xor(tmp0, &state->blocks[6]);
-			crypto_aegis_block_xor(tmp0, &state->blocks[1]);
-			crypto_aegis_block_xor(tmp0, &src_blk->blocks[0]);
-
-			*tmp1 = state->blocks[6];
-			crypto_aegis_block_and(tmp1, &state->blocks[7]);
-			crypto_aegis_block_xor(tmp1, &state->blocks[5]);
-			crypto_aegis_block_xor(tmp1, &state->blocks[2]);
-			crypto_aegis_block_xor(tmp1, &src_blk->blocks[1]);
-
-			crypto_aegis128l_update_a(state, src_blk);
-
-			*dst_blk = tmp;
-
-			size -= AEGIS128L_CHUNK_SIZE;
-			src += AEGIS128L_CHUNK_SIZE;
-			dst += AEGIS128L_CHUNK_SIZE;
-		}
-	} else {
-		while (size >= AEGIS128L_CHUNK_SIZE) {
-			*tmp0 = state->blocks[2];
-			crypto_aegis_block_and(tmp0, &state->blocks[3]);
-			crypto_aegis_block_xor(tmp0, &state->blocks[6]);
-			crypto_aegis_block_xor(tmp0, &state->blocks[1]);
-			crypto_xor(tmp0->bytes, src + 0 * AEGIS_BLOCK_SIZE,
-				   AEGIS_BLOCK_SIZE);
-
-			*tmp1 = state->blocks[6];
-			crypto_aegis_block_and(tmp1, &state->blocks[7]);
-			crypto_aegis_block_xor(tmp1, &state->blocks[5]);
-			crypto_aegis_block_xor(tmp1, &state->blocks[2]);
-			crypto_xor(tmp1->bytes, src + 1 * AEGIS_BLOCK_SIZE,
-				   AEGIS_BLOCK_SIZE);
-
-			crypto_aegis128l_update_u(state, src);
-
-			memcpy(dst, tmp.bytes, AEGIS128L_CHUNK_SIZE);
-
-			size -= AEGIS128L_CHUNK_SIZE;
-			src += AEGIS128L_CHUNK_SIZE;
-			dst += AEGIS128L_CHUNK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union aegis_chunk msg = {};
-		memcpy(msg.bytes, src, size);
-
-		*tmp0 = state->blocks[2];
-		crypto_aegis_block_and(tmp0, &state->blocks[3]);
-		crypto_aegis_block_xor(tmp0, &state->blocks[6]);
-		crypto_aegis_block_xor(tmp0, &state->blocks[1]);
-
-		*tmp1 = state->blocks[6];
-		crypto_aegis_block_and(tmp1, &state->blocks[7]);
-		crypto_aegis_block_xor(tmp1, &state->blocks[5]);
-		crypto_aegis_block_xor(tmp1, &state->blocks[2]);
-
-		crypto_aegis128l_update_a(state, &msg);
-
-		crypto_aegis_block_xor(&msg.blocks[0], tmp0);
-		crypto_aegis_block_xor(&msg.blocks[1], tmp1);
-
-		memcpy(dst, msg.bytes, size);
-	}
-}
-
-static void crypto_aegis128l_decrypt_chunk(struct aegis_state *state, u8 *dst,
-					   const u8 *src, unsigned int size)
-{
-	union aegis_chunk tmp;
-	union aegis_block *tmp0 = &tmp.blocks[0];
-	union aegis_block *tmp1 = &tmp.blocks[1];
-
-	if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
-		while (size >= AEGIS128L_CHUNK_SIZE) {
-			union aegis_chunk *dst_blk =
-					(union aegis_chunk *)dst;
-			const union aegis_chunk *src_blk =
-					(const union aegis_chunk *)src;
-
-			*tmp0 = state->blocks[2];
-			crypto_aegis_block_and(tmp0, &state->blocks[3]);
-			crypto_aegis_block_xor(tmp0, &state->blocks[6]);
-			crypto_aegis_block_xor(tmp0, &state->blocks[1]);
-			crypto_aegis_block_xor(tmp0, &src_blk->blocks[0]);
-
-			*tmp1 = state->blocks[6];
-			crypto_aegis_block_and(tmp1, &state->blocks[7]);
-			crypto_aegis_block_xor(tmp1, &state->blocks[5]);
-			crypto_aegis_block_xor(tmp1, &state->blocks[2]);
-			crypto_aegis_block_xor(tmp1, &src_blk->blocks[1]);
-
-			crypto_aegis128l_update_a(state, &tmp);
-
-			*dst_blk = tmp;
-
-			size -= AEGIS128L_CHUNK_SIZE;
-			src += AEGIS128L_CHUNK_SIZE;
-			dst += AEGIS128L_CHUNK_SIZE;
-		}
-	} else {
-		while (size >= AEGIS128L_CHUNK_SIZE) {
-			*tmp0 = state->blocks[2];
-			crypto_aegis_block_and(tmp0, &state->blocks[3]);
-			crypto_aegis_block_xor(tmp0, &state->blocks[6]);
-			crypto_aegis_block_xor(tmp0, &state->blocks[1]);
-			crypto_xor(tmp0->bytes, src + 0 * AEGIS_BLOCK_SIZE,
-				   AEGIS_BLOCK_SIZE);
-
-			*tmp1 = state->blocks[6];
-			crypto_aegis_block_and(tmp1, &state->blocks[7]);
-			crypto_aegis_block_xor(tmp1, &state->blocks[5]);
-			crypto_aegis_block_xor(tmp1, &state->blocks[2]);
-			crypto_xor(tmp1->bytes, src + 1 * AEGIS_BLOCK_SIZE,
-				   AEGIS_BLOCK_SIZE);
-
-			crypto_aegis128l_update_a(state, &tmp);
-
-			memcpy(dst, tmp.bytes, AEGIS128L_CHUNK_SIZE);
-
-			size -= AEGIS128L_CHUNK_SIZE;
-			src += AEGIS128L_CHUNK_SIZE;
-			dst += AEGIS128L_CHUNK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union aegis_chunk msg = {};
-		memcpy(msg.bytes, src, size);
-
-		*tmp0 = state->blocks[2];
-		crypto_aegis_block_and(tmp0, &state->blocks[3]);
-		crypto_aegis_block_xor(tmp0, &state->blocks[6]);
-		crypto_aegis_block_xor(tmp0, &state->blocks[1]);
-		crypto_aegis_block_xor(&msg.blocks[0], tmp0);
-
-		*tmp1 = state->blocks[6];
-		crypto_aegis_block_and(tmp1, &state->blocks[7]);
-		crypto_aegis_block_xor(tmp1, &state->blocks[5]);
-		crypto_aegis_block_xor(tmp1, &state->blocks[2]);
-		crypto_aegis_block_xor(&msg.blocks[1], tmp1);
-
-		memset(msg.bytes + size, 0, AEGIS128L_CHUNK_SIZE - size);
-
-		crypto_aegis128l_update_a(state, &msg);
-
-		memcpy(dst, msg.bytes, size);
-	}
-}
-
-static void crypto_aegis128l_process_ad(struct aegis_state *state,
-					struct scatterlist *sg_src,
-					unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	union aegis_chunk buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= AEGIS128L_CHUNK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = AEGIS128L_CHUNK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				crypto_aegis128l_update_a(state, &buf);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			crypto_aegis128l_ad(state, src, left);
-			src += left & ~(AEGIS128L_CHUNK_SIZE - 1);
-			left &= AEGIS128L_CHUNK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, AEGIS128L_CHUNK_SIZE - pos);
-		crypto_aegis128l_update_a(state, &buf);
-	}
-}
-
-static void crypto_aegis128l_process_crypt(struct aegis_state *state,
-					   struct aead_request *req,
-					   const struct aegis128l_ops *ops)
-{
-	struct skcipher_walk walk;
-
-	ops->skcipher_walk_init(&walk, req, false);
-
-	while (walk.nbytes) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		ops->crypt_chunk(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 nbytes);
-
-		skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-}
-
-static void crypto_aegis128l_final(struct aegis_state *state,
-				   union aegis_block *tag_xor,
-				   u64 assoclen, u64 cryptlen)
-{
-	u64 assocbits = assoclen * 8;
-	u64 cryptbits = cryptlen * 8;
-
-	union aegis_chunk tmp;
-	unsigned int i;
-
-	tmp.blocks[0].words64[0] = cpu_to_le64(assocbits);
-	tmp.blocks[0].words64[1] = cpu_to_le64(cryptbits);
-
-	crypto_aegis_block_xor(&tmp.blocks[0], &state->blocks[2]);
-
-	tmp.blocks[1] = tmp.blocks[0];
-	for (i = 0; i < 7; i++)
-		crypto_aegis128l_update_a(state, &tmp);
-
-	for (i = 0; i < 7; i++)
-		crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
-}
-
-static int crypto_aegis128l_setkey(struct crypto_aead *aead, const u8 *key,
-				   unsigned int keylen)
-{
-	struct aegis_ctx *ctx = crypto_aead_ctx(aead);
-
-	if (keylen != AEGIS128L_KEY_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE);
-	return 0;
-}
-
-static int crypto_aegis128l_setauthsize(struct crypto_aead *tfm,
-					unsigned int authsize)
-{
-	if (authsize > AEGIS128L_MAX_AUTH_SIZE)
-		return -EINVAL;
-	if (authsize < AEGIS128L_MIN_AUTH_SIZE)
-		return -EINVAL;
-	return 0;
-}
-
-static void crypto_aegis128l_crypt(struct aead_request *req,
-				   union aegis_block *tag_xor,
-				   unsigned int cryptlen,
-				   const struct aegis128l_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aegis_state state;
-
-	crypto_aegis128l_init(&state, &ctx->key, req->iv);
-	crypto_aegis128l_process_ad(&state, req->src, req->assoclen);
-	crypto_aegis128l_process_crypt(&state, req, ops);
-	crypto_aegis128l_final(&state, tag_xor, req->assoclen, cryptlen);
-}
-
-static int crypto_aegis128l_encrypt(struct aead_request *req)
-{
-	static const struct aegis128l_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_chunk = crypto_aegis128l_encrypt_chunk,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	union aegis_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_aegis128l_crypt(req, &tag, cryptlen, &ops);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
-				 authsize, 1);
-	return 0;
-}
-
-static int crypto_aegis128l_decrypt(struct aead_request *req)
-{
-	static const struct aegis128l_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_chunk = crypto_aegis128l_decrypt_chunk,
-	};
-	static const u8 zeros[AEGIS128L_MAX_AUTH_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	union aegis_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
-				 authsize, 0);
-
-	crypto_aegis128l_crypt(req, &tag, cryptlen, &ops);
-
-	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-
-static int crypto_aegis128l_init_tfm(struct crypto_aead *tfm)
-{
-	return 0;
-}
-
-static void crypto_aegis128l_exit_tfm(struct crypto_aead *tfm)
-{
-}
-
-static struct aead_alg crypto_aegis128l_alg = {
-	.setkey = crypto_aegis128l_setkey,
-	.setauthsize = crypto_aegis128l_setauthsize,
-	.encrypt = crypto_aegis128l_encrypt,
-	.decrypt = crypto_aegis128l_decrypt,
-	.init = crypto_aegis128l_init_tfm,
-	.exit = crypto_aegis128l_exit_tfm,
-
-	.ivsize = AEGIS128L_NONCE_SIZE,
-	.maxauthsize = AEGIS128L_MAX_AUTH_SIZE,
-	.chunksize = AEGIS128L_CHUNK_SIZE,
-
-	.base = {
-		.cra_blocksize = 1,
-		.cra_ctxsize = sizeof(struct aegis_ctx),
-		.cra_alignmask = 0,
-
-		.cra_priority = 100,
-
-		.cra_name = "aegis128l",
-		.cra_driver_name = "aegis128l-generic",
-
-		.cra_module = THIS_MODULE,
-	}
-};
-
-static int __init crypto_aegis128l_module_init(void)
-{
-	return crypto_register_aead(&crypto_aegis128l_alg);
-}
-
-static void __exit crypto_aegis128l_module_exit(void)
-{
-	crypto_unregister_aead(&crypto_aegis128l_alg);
-}
-
-subsys_initcall(crypto_aegis128l_module_init);
-module_exit(crypto_aegis128l_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm");
-MODULE_ALIAS_CRYPTO("aegis128l");
-MODULE_ALIAS_CRYPTO("aegis128l-generic");
diff --git a/crypto/aegis256.c b/crypto/aegis256.c
deleted file mode 100644
index b47fd39..0000000
--- a/crypto/aegis256.c
+++ /dev/null
@@ -1,473 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The AEGIS-256 Authenticated-Encryption Algorithm
- *
- * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <crypto/algapi.h>
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-
-#include "aegis.h"
-
-#define AEGIS256_NONCE_SIZE 32
-#define AEGIS256_STATE_BLOCKS 6
-#define AEGIS256_KEY_SIZE 32
-#define AEGIS256_MIN_AUTH_SIZE 8
-#define AEGIS256_MAX_AUTH_SIZE 16
-
-struct aegis_state {
-	union aegis_block blocks[AEGIS256_STATE_BLOCKS];
-};
-
-struct aegis_ctx {
-	union aegis_block key[AEGIS256_KEY_SIZE / AEGIS_BLOCK_SIZE];
-};
-
-struct aegis256_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_chunk)(struct aegis_state *state, u8 *dst,
-			    const u8 *src, unsigned int size);
-};
-
-static void crypto_aegis256_update(struct aegis_state *state)
-{
-	union aegis_block tmp;
-	unsigned int i;
-
-	tmp = state->blocks[AEGIS256_STATE_BLOCKS - 1];
-	for (i = AEGIS256_STATE_BLOCKS - 1; i > 0; i--)
-		crypto_aegis_aesenc(&state->blocks[i], &state->blocks[i - 1],
-				    &state->blocks[i]);
-	crypto_aegis_aesenc(&state->blocks[0], &tmp, &state->blocks[0]);
-}
-
-static void crypto_aegis256_update_a(struct aegis_state *state,
-				     const union aegis_block *msg)
-{
-	crypto_aegis256_update(state);
-	crypto_aegis_block_xor(&state->blocks[0], msg);
-}
-
-static void crypto_aegis256_update_u(struct aegis_state *state, const void *msg)
-{
-	crypto_aegis256_update(state);
-	crypto_xor(state->blocks[0].bytes, msg, AEGIS_BLOCK_SIZE);
-}
-
-static void crypto_aegis256_init(struct aegis_state *state,
-				 const union aegis_block *key,
-				 const u8 *iv)
-{
-	union aegis_block key_iv[2];
-	unsigned int i;
-
-	key_iv[0] = key[0];
-	key_iv[1] = key[1];
-	crypto_xor(key_iv[0].bytes, iv + 0 * AEGIS_BLOCK_SIZE,
-			AEGIS_BLOCK_SIZE);
-	crypto_xor(key_iv[1].bytes, iv + 1 * AEGIS_BLOCK_SIZE,
-			AEGIS_BLOCK_SIZE);
-
-	state->blocks[0] = key_iv[0];
-	state->blocks[1] = key_iv[1];
-	state->blocks[2] = crypto_aegis_const[1];
-	state->blocks[3] = crypto_aegis_const[0];
-	state->blocks[4] = key[0];
-	state->blocks[5] = key[1];
-
-	crypto_aegis_block_xor(&state->blocks[4], &crypto_aegis_const[0]);
-	crypto_aegis_block_xor(&state->blocks[5], &crypto_aegis_const[1]);
-
-	for (i = 0; i < 4; i++) {
-		crypto_aegis256_update_a(state, &key[0]);
-		crypto_aegis256_update_a(state, &key[1]);
-		crypto_aegis256_update_a(state, &key_iv[0]);
-		crypto_aegis256_update_a(state, &key_iv[1]);
-	}
-}
-
-static void crypto_aegis256_ad(struct aegis_state *state,
-			       const u8 *src, unsigned int size)
-{
-	if (AEGIS_ALIGNED(src)) {
-		const union aegis_block *src_blk =
-				(const union aegis_block *)src;
-
-		while (size >= AEGIS_BLOCK_SIZE) {
-			crypto_aegis256_update_a(state, src_blk);
-
-			size -= AEGIS_BLOCK_SIZE;
-			src_blk++;
-		}
-	} else {
-		while (size >= AEGIS_BLOCK_SIZE) {
-			crypto_aegis256_update_u(state, src);
-
-			size -= AEGIS_BLOCK_SIZE;
-			src += AEGIS_BLOCK_SIZE;
-		}
-	}
-}
-
-static void crypto_aegis256_encrypt_chunk(struct aegis_state *state, u8 *dst,
-					  const u8 *src, unsigned int size)
-{
-	union aegis_block tmp;
-
-	if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
-		while (size >= AEGIS_BLOCK_SIZE) {
-			union aegis_block *dst_blk =
-					(union aegis_block *)dst;
-			const union aegis_block *src_blk =
-					(const union aegis_block *)src;
-
-			tmp = state->blocks[2];
-			crypto_aegis_block_and(&tmp, &state->blocks[3]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[5]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
-			crypto_aegis_block_xor(&tmp, src_blk);
-
-			crypto_aegis256_update_a(state, src_blk);
-
-			*dst_blk = tmp;
-
-			size -= AEGIS_BLOCK_SIZE;
-			src += AEGIS_BLOCK_SIZE;
-			dst += AEGIS_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= AEGIS_BLOCK_SIZE) {
-			tmp = state->blocks[2];
-			crypto_aegis_block_and(&tmp, &state->blocks[3]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[5]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
-			crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
-
-			crypto_aegis256_update_u(state, src);
-
-			memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
-
-			size -= AEGIS_BLOCK_SIZE;
-			src += AEGIS_BLOCK_SIZE;
-			dst += AEGIS_BLOCK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union aegis_block msg = {};
-		memcpy(msg.bytes, src, size);
-
-		tmp = state->blocks[2];
-		crypto_aegis_block_and(&tmp, &state->blocks[3]);
-		crypto_aegis_block_xor(&tmp, &state->blocks[5]);
-		crypto_aegis_block_xor(&tmp, &state->blocks[4]);
-		crypto_aegis_block_xor(&tmp, &state->blocks[1]);
-
-		crypto_aegis256_update_a(state, &msg);
-
-		crypto_aegis_block_xor(&msg, &tmp);
-
-		memcpy(dst, msg.bytes, size);
-	}
-}
-
-static void crypto_aegis256_decrypt_chunk(struct aegis_state *state, u8 *dst,
-					  const u8 *src, unsigned int size)
-{
-	union aegis_block tmp;
-
-	if (AEGIS_ALIGNED(src) && AEGIS_ALIGNED(dst)) {
-		while (size >= AEGIS_BLOCK_SIZE) {
-			union aegis_block *dst_blk =
-					(union aegis_block *)dst;
-			const union aegis_block *src_blk =
-					(const union aegis_block *)src;
-
-			tmp = state->blocks[2];
-			crypto_aegis_block_and(&tmp, &state->blocks[3]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[5]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
-			crypto_aegis_block_xor(&tmp, src_blk);
-
-			crypto_aegis256_update_a(state, &tmp);
-
-			*dst_blk = tmp;
-
-			size -= AEGIS_BLOCK_SIZE;
-			src += AEGIS_BLOCK_SIZE;
-			dst += AEGIS_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= AEGIS_BLOCK_SIZE) {
-			tmp = state->blocks[2];
-			crypto_aegis_block_and(&tmp, &state->blocks[3]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[5]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[4]);
-			crypto_aegis_block_xor(&tmp, &state->blocks[1]);
-			crypto_xor(tmp.bytes, src, AEGIS_BLOCK_SIZE);
-
-			crypto_aegis256_update_a(state, &tmp);
-
-			memcpy(dst, tmp.bytes, AEGIS_BLOCK_SIZE);
-
-			size -= AEGIS_BLOCK_SIZE;
-			src += AEGIS_BLOCK_SIZE;
-			dst += AEGIS_BLOCK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union aegis_block msg = {};
-		memcpy(msg.bytes, src, size);
-
-		tmp = state->blocks[2];
-		crypto_aegis_block_and(&tmp, &state->blocks[3]);
-		crypto_aegis_block_xor(&tmp, &state->blocks[5]);
-		crypto_aegis_block_xor(&tmp, &state->blocks[4]);
-		crypto_aegis_block_xor(&tmp, &state->blocks[1]);
-		crypto_aegis_block_xor(&msg, &tmp);
-
-		memset(msg.bytes + size, 0, AEGIS_BLOCK_SIZE - size);
-
-		crypto_aegis256_update_a(state, &msg);
-
-		memcpy(dst, msg.bytes, size);
-	}
-}
-
-static void crypto_aegis256_process_ad(struct aegis_state *state,
-				       struct scatterlist *sg_src,
-				       unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	union aegis_block buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= AEGIS_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = AEGIS_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-				crypto_aegis256_update_a(state, &buf);
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			crypto_aegis256_ad(state, src, left);
-			src += left & ~(AEGIS_BLOCK_SIZE - 1);
-			left &= AEGIS_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, AEGIS_BLOCK_SIZE - pos);
-		crypto_aegis256_update_a(state, &buf);
-	}
-}
-
-static void crypto_aegis256_process_crypt(struct aegis_state *state,
-					  struct aead_request *req,
-					  const struct aegis256_ops *ops)
-{
-	struct skcipher_walk walk;
-
-	ops->skcipher_walk_init(&walk, req, false);
-
-	while (walk.nbytes) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		ops->crypt_chunk(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 nbytes);
-
-		skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-}
-
-static void crypto_aegis256_final(struct aegis_state *state,
-				  union aegis_block *tag_xor,
-				  u64 assoclen, u64 cryptlen)
-{
-	u64 assocbits = assoclen * 8;
-	u64 cryptbits = cryptlen * 8;
-
-	union aegis_block tmp;
-	unsigned int i;
-
-	tmp.words64[0] = cpu_to_le64(assocbits);
-	tmp.words64[1] = cpu_to_le64(cryptbits);
-
-	crypto_aegis_block_xor(&tmp, &state->blocks[3]);
-
-	for (i = 0; i < 7; i++)
-		crypto_aegis256_update_a(state, &tmp);
-
-	for (i = 0; i < AEGIS256_STATE_BLOCKS; i++)
-		crypto_aegis_block_xor(tag_xor, &state->blocks[i]);
-}
-
-static int crypto_aegis256_setkey(struct crypto_aead *aead, const u8 *key,
-				  unsigned int keylen)
-{
-	struct aegis_ctx *ctx = crypto_aead_ctx(aead);
-
-	if (keylen != AEGIS256_KEY_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	memcpy(ctx->key[0].bytes, key, AEGIS_BLOCK_SIZE);
-	memcpy(ctx->key[1].bytes, key + AEGIS_BLOCK_SIZE,
-			AEGIS_BLOCK_SIZE);
-	return 0;
-}
-
-static int crypto_aegis256_setauthsize(struct crypto_aead *tfm,
-				       unsigned int authsize)
-{
-	if (authsize > AEGIS256_MAX_AUTH_SIZE)
-		return -EINVAL;
-	if (authsize < AEGIS256_MIN_AUTH_SIZE)
-		return -EINVAL;
-	return 0;
-}
-
-static void crypto_aegis256_crypt(struct aead_request *req,
-				  union aegis_block *tag_xor,
-				  unsigned int cryptlen,
-				  const struct aegis256_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct aegis_ctx *ctx = crypto_aead_ctx(tfm);
-	struct aegis_state state;
-
-	crypto_aegis256_init(&state, ctx->key, req->iv);
-	crypto_aegis256_process_ad(&state, req->src, req->assoclen);
-	crypto_aegis256_process_crypt(&state, req, ops);
-	crypto_aegis256_final(&state, tag_xor, req->assoclen, cryptlen);
-}
-
-static int crypto_aegis256_encrypt(struct aead_request *req)
-{
-	static const struct aegis256_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_chunk = crypto_aegis256_encrypt_chunk,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	union aegis_block tag = {};
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_aegis256_crypt(req, &tag, cryptlen, &ops);
-
-	scatterwalk_map_and_copy(tag.bytes, req->dst, req->assoclen + cryptlen,
-				 authsize, 1);
-	return 0;
-}
-
-static int crypto_aegis256_decrypt(struct aead_request *req)
-{
-	static const struct aegis256_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_chunk = crypto_aegis256_decrypt_chunk,
-	};
-	static const u8 zeros[AEGIS256_MAX_AUTH_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	union aegis_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag.bytes, req->src, req->assoclen + cryptlen,
-				 authsize, 0);
-
-	crypto_aegis256_crypt(req, &tag, cryptlen, &ops);
-
-	return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-
-static int crypto_aegis256_init_tfm(struct crypto_aead *tfm)
-{
-	return 0;
-}
-
-static void crypto_aegis256_exit_tfm(struct crypto_aead *tfm)
-{
-}
-
-static struct aead_alg crypto_aegis256_alg = {
-	.setkey = crypto_aegis256_setkey,
-	.setauthsize = crypto_aegis256_setauthsize,
-	.encrypt = crypto_aegis256_encrypt,
-	.decrypt = crypto_aegis256_decrypt,
-	.init = crypto_aegis256_init_tfm,
-	.exit = crypto_aegis256_exit_tfm,
-
-	.ivsize = AEGIS256_NONCE_SIZE,
-	.maxauthsize = AEGIS256_MAX_AUTH_SIZE,
-	.chunksize = AEGIS_BLOCK_SIZE,
-
-	.base = {
-		.cra_blocksize = 1,
-		.cra_ctxsize = sizeof(struct aegis_ctx),
-		.cra_alignmask = 0,
-
-		.cra_priority = 100,
-
-		.cra_name = "aegis256",
-		.cra_driver_name = "aegis256-generic",
-
-		.cra_module = THIS_MODULE,
-	}
-};
-
-static int __init crypto_aegis256_module_init(void)
-{
-	return crypto_register_aead(&crypto_aegis256_alg);
-}
-
-static void __exit crypto_aegis256_module_exit(void)
-{
-	crypto_unregister_aead(&crypto_aegis256_alg);
-}
-
-subsys_initcall(crypto_aegis256_module_init);
-module_exit(crypto_aegis256_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm");
-MODULE_ALIAS_CRYPTO("aegis256");
-MODULE_ALIAS_CRYPTO("aegis256-generic");
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index f217568..22e5867 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -61,8 +61,6 @@
 	return x >> (n << 3);
 }
 
-static const u32 rco_tab[10] = { 1, 2, 4, 8, 16, 32, 64, 128, 27, 54 };
-
 /* cacheline-aligned to facilitate prefetching into cache */
 __visible const u32 crypto_ft_tab[4][256] ____cacheline_aligned = {
 	{
@@ -328,7 +326,7 @@
 	}
 };
 
-__visible const u32 crypto_fl_tab[4][256] ____cacheline_aligned = {
+static const u32 crypto_fl_tab[4][256] ____cacheline_aligned = {
 	{
 		0x00000063, 0x0000007c, 0x00000077, 0x0000007b,
 		0x000000f2, 0x0000006b, 0x0000006f, 0x000000c5,
@@ -856,7 +854,7 @@
 	}
 };
 
-__visible const u32 crypto_il_tab[4][256] ____cacheline_aligned = {
+static const u32 crypto_il_tab[4][256] ____cacheline_aligned = {
 	{
 		0x00000052, 0x00000009, 0x0000006a, 0x000000d5,
 		0x00000030, 0x00000036, 0x000000a5, 0x00000038,
@@ -1121,158 +1119,7 @@
 };
 
 EXPORT_SYMBOL_GPL(crypto_ft_tab);
-EXPORT_SYMBOL_GPL(crypto_fl_tab);
 EXPORT_SYMBOL_GPL(crypto_it_tab);
-EXPORT_SYMBOL_GPL(crypto_il_tab);
-
-/* initialise the key schedule from the user supplied key */
-
-#define star_x(x) (((x) & 0x7f7f7f7f) << 1) ^ ((((x) & 0x80808080) >> 7) * 0x1b)
-
-#define imix_col(y, x)	do {		\
-	u	= star_x(x);		\
-	v	= star_x(u);		\
-	w	= star_x(v);		\
-	t	= w ^ (x);		\
-	(y)	= u ^ v ^ w;		\
-	(y)	^= ror32(u ^ t, 8) ^	\
-		ror32(v ^ t, 16) ^	\
-		ror32(t, 24);		\
-} while (0)
-
-#define ls_box(x)		\
-	crypto_fl_tab[0][byte(x, 0)] ^	\
-	crypto_fl_tab[1][byte(x, 1)] ^	\
-	crypto_fl_tab[2][byte(x, 2)] ^	\
-	crypto_fl_tab[3][byte(x, 3)]
-
-#define loop4(i)	do {		\
-	t = ror32(t, 8);		\
-	t = ls_box(t) ^ rco_tab[i];	\
-	t ^= ctx->key_enc[4 * i];		\
-	ctx->key_enc[4 * i + 4] = t;		\
-	t ^= ctx->key_enc[4 * i + 1];		\
-	ctx->key_enc[4 * i + 5] = t;		\
-	t ^= ctx->key_enc[4 * i + 2];		\
-	ctx->key_enc[4 * i + 6] = t;		\
-	t ^= ctx->key_enc[4 * i + 3];		\
-	ctx->key_enc[4 * i + 7] = t;		\
-} while (0)
-
-#define loop6(i)	do {		\
-	t = ror32(t, 8);		\
-	t = ls_box(t) ^ rco_tab[i];	\
-	t ^= ctx->key_enc[6 * i];		\
-	ctx->key_enc[6 * i + 6] = t;		\
-	t ^= ctx->key_enc[6 * i + 1];		\
-	ctx->key_enc[6 * i + 7] = t;		\
-	t ^= ctx->key_enc[6 * i + 2];		\
-	ctx->key_enc[6 * i + 8] = t;		\
-	t ^= ctx->key_enc[6 * i + 3];		\
-	ctx->key_enc[6 * i + 9] = t;		\
-	t ^= ctx->key_enc[6 * i + 4];		\
-	ctx->key_enc[6 * i + 10] = t;		\
-	t ^= ctx->key_enc[6 * i + 5];		\
-	ctx->key_enc[6 * i + 11] = t;		\
-} while (0)
-
-#define loop8tophalf(i)	do {			\
-	t = ror32(t, 8);			\
-	t = ls_box(t) ^ rco_tab[i];		\
-	t ^= ctx->key_enc[8 * i];			\
-	ctx->key_enc[8 * i + 8] = t;			\
-	t ^= ctx->key_enc[8 * i + 1];			\
-	ctx->key_enc[8 * i + 9] = t;			\
-	t ^= ctx->key_enc[8 * i + 2];			\
-	ctx->key_enc[8 * i + 10] = t;			\
-	t ^= ctx->key_enc[8 * i + 3];			\
-	ctx->key_enc[8 * i + 11] = t;			\
-} while (0)
-
-#define loop8(i)	do {				\
-	loop8tophalf(i);				\
-	t  = ctx->key_enc[8 * i + 4] ^ ls_box(t);	\
-	ctx->key_enc[8 * i + 12] = t;			\
-	t ^= ctx->key_enc[8 * i + 5];			\
-	ctx->key_enc[8 * i + 13] = t;			\
-	t ^= ctx->key_enc[8 * i + 6];			\
-	ctx->key_enc[8 * i + 14] = t;			\
-	t ^= ctx->key_enc[8 * i + 7];			\
-	ctx->key_enc[8 * i + 15] = t;			\
-} while (0)
-
-/**
- * crypto_aes_expand_key - Expands the AES key as described in FIPS-197
- * @ctx:	The location where the computed key will be stored.
- * @in_key:	The supplied key.
- * @key_len:	The length of the supplied key.
- *
- * Returns 0 on success. The function fails only if an invalid key size (or
- * pointer) is supplied.
- * The expanded key size is 240 bytes (max of 14 rounds with a unique 16 bytes
- * key schedule plus a 16 bytes key which is used before the first round).
- * The decryption key is prepared for the "Equivalent Inverse Cipher" as
- * described in FIPS-197. The first slot (16 bytes) of each key (enc or dec) is
- * for the initial combination, the second slot for the first round and so on.
- */
-int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
-		unsigned int key_len)
-{
-	u32 i, t, u, v, w, j;
-
-	if (key_len != AES_KEYSIZE_128 && key_len != AES_KEYSIZE_192 &&
-			key_len != AES_KEYSIZE_256)
-		return -EINVAL;
-
-	ctx->key_length = key_len;
-
-	ctx->key_enc[0] = get_unaligned_le32(in_key);
-	ctx->key_enc[1] = get_unaligned_le32(in_key + 4);
-	ctx->key_enc[2] = get_unaligned_le32(in_key + 8);
-	ctx->key_enc[3] = get_unaligned_le32(in_key + 12);
-
-	ctx->key_dec[key_len + 24] = ctx->key_enc[0];
-	ctx->key_dec[key_len + 25] = ctx->key_enc[1];
-	ctx->key_dec[key_len + 26] = ctx->key_enc[2];
-	ctx->key_dec[key_len + 27] = ctx->key_enc[3];
-
-	switch (key_len) {
-	case AES_KEYSIZE_128:
-		t = ctx->key_enc[3];
-		for (i = 0; i < 10; ++i)
-			loop4(i);
-		break;
-
-	case AES_KEYSIZE_192:
-		ctx->key_enc[4] = get_unaligned_le32(in_key + 16);
-		t = ctx->key_enc[5] = get_unaligned_le32(in_key + 20);
-		for (i = 0; i < 8; ++i)
-			loop6(i);
-		break;
-
-	case AES_KEYSIZE_256:
-		ctx->key_enc[4] = get_unaligned_le32(in_key + 16);
-		ctx->key_enc[5] = get_unaligned_le32(in_key + 20);
-		ctx->key_enc[6] = get_unaligned_le32(in_key + 24);
-		t = ctx->key_enc[7] = get_unaligned_le32(in_key + 28);
-		for (i = 0; i < 6; ++i)
-			loop8(i);
-		loop8tophalf(i);
-		break;
-	}
-
-	ctx->key_dec[0] = ctx->key_enc[key_len + 24];
-	ctx->key_dec[1] = ctx->key_enc[key_len + 25];
-	ctx->key_dec[2] = ctx->key_enc[key_len + 26];
-	ctx->key_dec[3] = ctx->key_enc[key_len + 27];
-
-	for (i = 4; i < key_len + 24; ++i) {
-		j = key_len + 24 - (i & ~3) + (i & 3);
-		imix_col(ctx->key_dec[j], ctx->key_enc[i]);
-	}
-	return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_aes_expand_key);
 
 /**
  * crypto_aes_set_key - Set the AES key.
@@ -1281,7 +1128,7 @@
  * @key_len:	The size of the key.
  *
  * Returns 0 on success, on failure the %CRYPTO_TFM_RES_BAD_KEY_LEN flag in tfm
- * is set. The function uses crypto_aes_expand_key() to expand the key.
+ * is set. The function uses aes_expand_key() to expand the key.
  * &crypto_aes_ctx _must_ be the private data embedded in @tfm which is
  * retrieved with crypto_tfm_ctx().
  */
@@ -1292,7 +1139,7 @@
 	u32 *flags = &tfm->crt_flags;
 	int ret;
 
-	ret = crypto_aes_expand_key(ctx, in_key, key_len);
+	ret = aes_expandkey(ctx, in_key, key_len);
 	if (!ret)
 		return 0;
 
@@ -1332,7 +1179,7 @@
 	f_rl(bo, bi, 3, k);	\
 } while (0)
 
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void crypto_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 b0[4], b1[4];
@@ -1402,7 +1249,7 @@
 	i_rl(bo, bi, 3, k);	\
 } while (0)
 
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void crypto_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
 	u32 b0[4], b1[4];
@@ -1454,8 +1301,8 @@
 			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
 			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
 			.cia_setkey		=	crypto_aes_set_key,
-			.cia_encrypt		=	aes_encrypt,
-			.cia_decrypt		=	aes_decrypt
+			.cia_encrypt		=	crypto_aes_encrypt,
+			.cia_decrypt		=	crypto_aes_decrypt
 		}
 	}
 };
diff --git a/crypto/aes_ti.c b/crypto/aes_ti.c
index 798fc9a..205c2c2 100644
--- a/crypto/aes_ti.c
+++ b/crypto/aes_ti.c
@@ -8,271 +8,19 @@
 #include <crypto/aes.h>
 #include <linux/crypto.h>
 #include <linux/module.h>
-#include <asm/unaligned.h>
-
-/*
- * Emit the sbox as volatile const to prevent the compiler from doing
- * constant folding on sbox references involving fixed indexes.
- */
-static volatile const u8 __cacheline_aligned __aesti_sbox[] = {
-	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
-	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
-	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
-	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
-	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
-	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
-	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
-	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
-	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
-	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
-	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
-	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
-	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
-	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
-	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
-	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
-	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
-	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
-	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
-	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
-	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
-	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
-	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
-	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
-	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
-	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
-	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
-	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
-	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
-	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
-	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
-	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
-};
-
-static volatile const u8 __cacheline_aligned __aesti_inv_sbox[] = {
-	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
-	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
-	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
-	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
-	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
-	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
-	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
-	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
-	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
-	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
-	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
-	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
-	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
-	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
-	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
-	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
-	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
-	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
-	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
-	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
-	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
-	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
-	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
-	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
-	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
-	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
-	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
-	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
-	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
-	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
-	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
-	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
-};
-
-static u32 mul_by_x(u32 w)
-{
-	u32 x = w & 0x7f7f7f7f;
-	u32 y = w & 0x80808080;
-
-	/* multiply by polynomial 'x' (0b10) in GF(2^8) */
-	return (x << 1) ^ (y >> 7) * 0x1b;
-}
-
-static u32 mul_by_x2(u32 w)
-{
-	u32 x = w & 0x3f3f3f3f;
-	u32 y = w & 0x80808080;
-	u32 z = w & 0x40404040;
-
-	/* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
-	return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
-}
-
-static u32 mix_columns(u32 x)
-{
-	/*
-	 * Perform the following matrix multiplication in GF(2^8)
-	 *
-	 * | 0x2 0x3 0x1 0x1 |   | x[0] |
-	 * | 0x1 0x2 0x3 0x1 |   | x[1] |
-	 * | 0x1 0x1 0x2 0x3 | x | x[2] |
-	 * | 0x3 0x1 0x1 0x2 |   | x[3] |
-	 */
-	u32 y = mul_by_x(x) ^ ror32(x, 16);
-
-	return y ^ ror32(x ^ y, 8);
-}
-
-static u32 inv_mix_columns(u32 x)
-{
-	/*
-	 * Perform the following matrix multiplication in GF(2^8)
-	 *
-	 * | 0xe 0xb 0xd 0x9 |   | x[0] |
-	 * | 0x9 0xe 0xb 0xd |   | x[1] |
-	 * | 0xd 0x9 0xe 0xb | x | x[2] |
-	 * | 0xb 0xd 0x9 0xe |   | x[3] |
-	 *
-	 * which can conveniently be reduced to
-	 *
-	 * | 0x2 0x3 0x1 0x1 |   | 0x5 0x0 0x4 0x0 |   | x[0] |
-	 * | 0x1 0x2 0x3 0x1 |   | 0x0 0x5 0x0 0x4 |   | x[1] |
-	 * | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
-	 * | 0x3 0x1 0x1 0x2 |   | 0x0 0x4 0x0 0x5 |   | x[3] |
-	 */
-	u32 y = mul_by_x2(x);
-
-	return mix_columns(x ^ y ^ ror32(y, 16));
-}
-
-static __always_inline u32 subshift(u32 in[], int pos)
-{
-	return (__aesti_sbox[in[pos] & 0xff]) ^
-	       (__aesti_sbox[(in[(pos + 1) % 4] >>  8) & 0xff] <<  8) ^
-	       (__aesti_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
-	       (__aesti_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
-}
-
-static __always_inline u32 inv_subshift(u32 in[], int pos)
-{
-	return (__aesti_inv_sbox[in[pos] & 0xff]) ^
-	       (__aesti_inv_sbox[(in[(pos + 3) % 4] >>  8) & 0xff] <<  8) ^
-	       (__aesti_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
-	       (__aesti_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
-}
-
-static u32 subw(u32 in)
-{
-	return (__aesti_sbox[in & 0xff]) ^
-	       (__aesti_sbox[(in >>  8) & 0xff] <<  8) ^
-	       (__aesti_sbox[(in >> 16) & 0xff] << 16) ^
-	       (__aesti_sbox[(in >> 24) & 0xff] << 24);
-}
-
-static int aesti_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
-			    unsigned int key_len)
-{
-	u32 kwords = key_len / sizeof(u32);
-	u32 rc, i, j;
-
-	if (key_len != AES_KEYSIZE_128 &&
-	    key_len != AES_KEYSIZE_192 &&
-	    key_len != AES_KEYSIZE_256)
-		return -EINVAL;
-
-	ctx->key_length = key_len;
-
-	for (i = 0; i < kwords; i++)
-		ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
-
-	for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
-		u32 *rki = ctx->key_enc + (i * kwords);
-		u32 *rko = rki + kwords;
-
-		rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
-		rko[1] = rko[0] ^ rki[1];
-		rko[2] = rko[1] ^ rki[2];
-		rko[3] = rko[2] ^ rki[3];
-
-		if (key_len == 24) {
-			if (i >= 7)
-				break;
-			rko[4] = rko[3] ^ rki[4];
-			rko[5] = rko[4] ^ rki[5];
-		} else if (key_len == 32) {
-			if (i >= 6)
-				break;
-			rko[4] = subw(rko[3]) ^ rki[4];
-			rko[5] = rko[4] ^ rki[5];
-			rko[6] = rko[5] ^ rki[6];
-			rko[7] = rko[6] ^ rki[7];
-		}
-	}
-
-	/*
-	 * Generate the decryption keys for the Equivalent Inverse Cipher.
-	 * This involves reversing the order of the round keys, and applying
-	 * the Inverse Mix Columns transformation to all but the first and
-	 * the last one.
-	 */
-	ctx->key_dec[0] = ctx->key_enc[key_len + 24];
-	ctx->key_dec[1] = ctx->key_enc[key_len + 25];
-	ctx->key_dec[2] = ctx->key_enc[key_len + 26];
-	ctx->key_dec[3] = ctx->key_enc[key_len + 27];
-
-	for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
-		ctx->key_dec[i]     = inv_mix_columns(ctx->key_enc[j]);
-		ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]);
-		ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]);
-		ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]);
-	}
-
-	ctx->key_dec[i]     = ctx->key_enc[0];
-	ctx->key_dec[i + 1] = ctx->key_enc[1];
-	ctx->key_dec[i + 2] = ctx->key_enc[2];
-	ctx->key_dec[i + 3] = ctx->key_enc[3];
-
-	return 0;
-}
 
 static int aesti_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			 unsigned int key_len)
 {
 	struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	int err;
 
-	err = aesti_expand_key(ctx, in_key, key_len);
-	if (err)
-		return err;
-
-	/*
-	 * In order to force the compiler to emit data independent Sbox lookups
-	 * at the start of each block, xor the first round key with values at
-	 * fixed indexes in the Sbox. This will need to be repeated each time
-	 * the key is used, which will pull the entire Sbox into the D-cache
-	 * before any data dependent Sbox lookups are performed.
-	 */
-	ctx->key_enc[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
-	ctx->key_enc[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
-	ctx->key_enc[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
-	ctx->key_enc[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
-
-	ctx->key_dec[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
-	ctx->key_dec[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
-	ctx->key_dec[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
-	ctx->key_dec[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224];
-
-	return 0;
+	return aes_expandkey(ctx, in_key, key_len);
 }
 
 static void aesti_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	const u32 *rkp = ctx->key_enc + 4;
-	int rounds = 6 + ctx->key_length / 4;
-	u32 st0[4], st1[4];
 	unsigned long flags;
-	int round;
-
-	st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
-	st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
-	st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
-	st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
 
 	/*
 	 * Temporarily disable interrupts to avoid races where cachelines are
@@ -280,30 +28,7 @@
 	 */
 	local_irq_save(flags);
 
-	st0[0] ^= __aesti_sbox[ 0] ^ __aesti_sbox[128];
-	st0[1] ^= __aesti_sbox[32] ^ __aesti_sbox[160];
-	st0[2] ^= __aesti_sbox[64] ^ __aesti_sbox[192];
-	st0[3] ^= __aesti_sbox[96] ^ __aesti_sbox[224];
-
-	for (round = 0;; round += 2, rkp += 8) {
-		st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
-		st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
-		st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
-		st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
-
-		if (round == rounds - 2)
-			break;
-
-		st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
-		st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
-		st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
-		st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
-	}
-
-	put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
-	put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
-	put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
-	put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
+	aes_encrypt(ctx, out, in);
 
 	local_irq_restore(flags);
 }
@@ -311,16 +36,7 @@
 static void aesti_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	const struct crypto_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	const u32 *rkp = ctx->key_dec + 4;
-	int rounds = 6 + ctx->key_length / 4;
-	u32 st0[4], st1[4];
 	unsigned long flags;
-	int round;
-
-	st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
-	st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
-	st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
-	st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
 
 	/*
 	 * Temporarily disable interrupts to avoid races where cachelines are
@@ -328,30 +44,7 @@
 	 */
 	local_irq_save(flags);
 
-	st0[0] ^= __aesti_inv_sbox[ 0] ^ __aesti_inv_sbox[128];
-	st0[1] ^= __aesti_inv_sbox[32] ^ __aesti_inv_sbox[160];
-	st0[2] ^= __aesti_inv_sbox[64] ^ __aesti_inv_sbox[192];
-	st0[3] ^= __aesti_inv_sbox[96] ^ __aesti_inv_sbox[224];
-
-	for (round = 0;; round += 2, rkp += 8) {
-		st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
-		st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
-		st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
-		st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
-
-		if (round == rounds - 2)
-			break;
-
-		st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
-		st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
-		st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
-		st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
-	}
-
-	put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
-	put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
-	put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
-	put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
+	aes_decrypt(ctx, out, in);
 
 	local_irq_restore(flags);
 }
diff --git a/crypto/cryptd.c b/crypto/cryptd.c
index 3748f9b..927760b 100644
--- a/crypto/cryptd.c
+++ b/crypto/cryptd.c
@@ -16,7 +16,7 @@
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/cryptd.h>
-#include <linux/atomic.h>
+#include <linux/refcount.h>
 #include <linux/err.h>
 #include <linux/init.h>
 #include <linux/kernel.h>
@@ -63,7 +63,7 @@
 };
 
 struct cryptd_skcipher_ctx {
-	atomic_t refcnt;
+	refcount_t refcnt;
 	struct crypto_sync_skcipher *child;
 };
 
@@ -72,7 +72,7 @@
 };
 
 struct cryptd_hash_ctx {
-	atomic_t refcnt;
+	refcount_t refcnt;
 	struct crypto_shash *child;
 };
 
@@ -82,7 +82,7 @@
 };
 
 struct cryptd_aead_ctx {
-	atomic_t refcnt;
+	refcount_t refcnt;
 	struct crypto_aead *child;
 };
 
@@ -127,7 +127,7 @@
 {
 	int cpu, err;
 	struct cryptd_cpu_queue *cpu_queue;
-	atomic_t *refcnt;
+	refcount_t *refcnt;
 
 	cpu = get_cpu();
 	cpu_queue = this_cpu_ptr(queue->cpu_queue);
@@ -140,10 +140,10 @@
 
 	queue_work_on(cpu, cryptd_wq, &cpu_queue->work);
 
-	if (!atomic_read(refcnt))
+	if (!refcount_read(refcnt))
 		goto out_put_cpu;
 
-	atomic_inc(refcnt);
+	refcount_inc(refcnt);
 
 out_put_cpu:
 	put_cpu();
@@ -270,13 +270,13 @@
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(tfm);
 	struct cryptd_skcipher_request_ctx *rctx = skcipher_request_ctx(req);
-	int refcnt = atomic_read(&ctx->refcnt);
+	int refcnt = refcount_read(&ctx->refcnt);
 
 	local_bh_disable();
 	rctx->complete(&req->base, err);
 	local_bh_enable();
 
-	if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt))
+	if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt))
 		crypto_free_skcipher(tfm);
 }
 
@@ -521,13 +521,13 @@
 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
 	struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(tfm);
 	struct cryptd_hash_request_ctx *rctx = ahash_request_ctx(req);
-	int refcnt = atomic_read(&ctx->refcnt);
+	int refcnt = refcount_read(&ctx->refcnt);
 
 	local_bh_disable();
 	rctx->complete(&req->base, err);
 	local_bh_enable();
 
-	if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt))
+	if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt))
 		crypto_free_ahash(tfm);
 }
 
@@ -772,13 +772,13 @@
 
 out:
 	ctx = crypto_aead_ctx(tfm);
-	refcnt = atomic_read(&ctx->refcnt);
+	refcnt = refcount_read(&ctx->refcnt);
 
 	local_bh_disable();
 	compl(&req->base, err);
 	local_bh_enable();
 
-	if (err != -EINPROGRESS && refcnt && atomic_dec_and_test(&ctx->refcnt))
+	if (err != -EINPROGRESS && refcnt && refcount_dec_and_test(&ctx->refcnt))
 		crypto_free_aead(tfm);
 }
 
@@ -979,7 +979,7 @@
 	}
 
 	ctx = crypto_skcipher_ctx(tfm);
-	atomic_set(&ctx->refcnt, 1);
+	refcount_set(&ctx->refcnt, 1);
 
 	return container_of(tfm, struct cryptd_skcipher, base);
 }
@@ -997,7 +997,7 @@
 {
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
 
-	return atomic_read(&ctx->refcnt) - 1;
+	return refcount_read(&ctx->refcnt) - 1;
 }
 EXPORT_SYMBOL_GPL(cryptd_skcipher_queued);
 
@@ -1005,7 +1005,7 @@
 {
 	struct cryptd_skcipher_ctx *ctx = crypto_skcipher_ctx(&tfm->base);
 
-	if (atomic_dec_and_test(&ctx->refcnt))
+	if (refcount_dec_and_test(&ctx->refcnt))
 		crypto_free_skcipher(&tfm->base);
 }
 EXPORT_SYMBOL_GPL(cryptd_free_skcipher);
@@ -1029,7 +1029,7 @@
 	}
 
 	ctx = crypto_ahash_ctx(tfm);
-	atomic_set(&ctx->refcnt, 1);
+	refcount_set(&ctx->refcnt, 1);
 
 	return __cryptd_ahash_cast(tfm);
 }
@@ -1054,7 +1054,7 @@
 {
 	struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
 
-	return atomic_read(&ctx->refcnt) - 1;
+	return refcount_read(&ctx->refcnt) - 1;
 }
 EXPORT_SYMBOL_GPL(cryptd_ahash_queued);
 
@@ -1062,7 +1062,7 @@
 {
 	struct cryptd_hash_ctx *ctx = crypto_ahash_ctx(&tfm->base);
 
-	if (atomic_dec_and_test(&ctx->refcnt))
+	if (refcount_dec_and_test(&ctx->refcnt))
 		crypto_free_ahash(&tfm->base);
 }
 EXPORT_SYMBOL_GPL(cryptd_free_ahash);
@@ -1086,7 +1086,7 @@
 	}
 
 	ctx = crypto_aead_ctx(tfm);
-	atomic_set(&ctx->refcnt, 1);
+	refcount_set(&ctx->refcnt, 1);
 
 	return __cryptd_aead_cast(tfm);
 }
@@ -1104,7 +1104,7 @@
 {
 	struct cryptd_aead_ctx *ctx = crypto_aead_ctx(&tfm->base);
 
-	return atomic_read(&ctx->refcnt) - 1;
+	return refcount_read(&ctx->refcnt) - 1;
 }
 EXPORT_SYMBOL_GPL(cryptd_aead_queued);
 
@@ -1112,7 +1112,7 @@
 {
 	struct cryptd_aead_ctx *ctx = crypto_aead_ctx(&tfm->base);
 
-	if (atomic_dec_and_test(&ctx->refcnt))
+	if (refcount_dec_and_test(&ctx->refcnt))
 		crypto_free_aead(&tfm->base);
 }
 EXPORT_SYMBOL_GPL(cryptd_free_aead);
diff --git a/crypto/crypto_engine.c b/crypto/crypto_engine.c
index d7502ec..055d179 100644
--- a/crypto/crypto_engine.c
+++ b/crypto/crypto_engine.c
@@ -425,7 +425,7 @@
  */
 struct crypto_engine *crypto_engine_alloc_init(struct device *dev, bool rt)
 {
-	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
+	struct sched_param param = { .sched_priority = MAX_RT_PRIO / 2 };
 	struct crypto_engine *engine;
 
 	if (!dev)
diff --git a/crypto/crypto_user_base.c b/crypto/crypto_user_base.c
index c65e390..910e0b4 100644
--- a/crypto/crypto_user_base.c
+++ b/crypto/crypto_user_base.c
@@ -10,9 +10,10 @@
 #include <linux/crypto.h>
 #include <linux/cryptouser.h>
 #include <linux/sched.h>
-#include <net/netlink.h>
 #include <linux/security.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
+#include <net/sock.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/rng.h>
 #include <crypto/akcipher.h>
@@ -25,9 +26,6 @@
 
 static DEFINE_MUTEX(crypto_cfg_mutex);
 
-/* The crypto netlink socket */
-struct sock *crypto_nlsk;
-
 struct crypto_dump_info {
 	struct sk_buff *in_skb;
 	struct sk_buff *out_skb;
@@ -186,6 +184,7 @@
 static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 			 struct nlattr **attrs)
 {
+	struct net *net = sock_net(in_skb->sk);
 	struct crypto_user_alg *p = nlmsg_data(in_nlh);
 	struct crypto_alg *alg;
 	struct sk_buff *skb;
@@ -217,7 +216,7 @@
 	if (err)
 		return err;
 
-	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
+	return nlmsg_unicast(net->crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
 }
 
 static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb)
@@ -420,6 +419,7 @@
 static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh,
 			       struct netlink_ext_ack *extack)
 {
+	struct net *net = sock_net(skb->sk);
 	struct nlattr *attrs[CRYPTOCFGA_MAX+1];
 	const struct crypto_link *link;
 	int type, err;
@@ -450,7 +450,7 @@
 				.done = link->done,
 				.min_dump_alloc = min(dump_alloc, 65535UL),
 			};
-			err = netlink_dump_start(crypto_nlsk, skb, nlh, &c);
+			err = netlink_dump_start(net->crypto_nlsk, skb, nlh, &c);
 		}
 
 		return err;
@@ -474,22 +474,35 @@
 	mutex_unlock(&crypto_cfg_mutex);
 }
 
-static int __init crypto_user_init(void)
+static int __net_init crypto_netlink_init(struct net *net)
 {
 	struct netlink_kernel_cfg cfg = {
 		.input	= crypto_netlink_rcv,
 	};
 
-	crypto_nlsk = netlink_kernel_create(&init_net, NETLINK_CRYPTO, &cfg);
-	if (!crypto_nlsk)
-		return -ENOMEM;
+	net->crypto_nlsk = netlink_kernel_create(net, NETLINK_CRYPTO, &cfg);
+	return net->crypto_nlsk == NULL ? -ENOMEM : 0;
+}
 
-	return 0;
+static void __net_exit crypto_netlink_exit(struct net *net)
+{
+	netlink_kernel_release(net->crypto_nlsk);
+	net->crypto_nlsk = NULL;
+}
+
+static struct pernet_operations crypto_netlink_net_ops = {
+	.init = crypto_netlink_init,
+	.exit = crypto_netlink_exit,
+};
+
+static int __init crypto_user_init(void)
+{
+	return register_pernet_subsys(&crypto_netlink_net_ops);
 }
 
 static void __exit crypto_user_exit(void)
 {
-	netlink_kernel_release(crypto_nlsk);
+	unregister_pernet_subsys(&crypto_netlink_net_ops);
 }
 
 module_init(crypto_user_init);
diff --git a/crypto/crypto_user_stat.c b/crypto/crypto_user_stat.c
index a03f326..8bad884 100644
--- a/crypto/crypto_user_stat.c
+++ b/crypto/crypto_user_stat.c
@@ -10,6 +10,7 @@
 #include <linux/cryptouser.h>
 #include <linux/sched.h>
 #include <net/netlink.h>
+#include <net/sock.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/internal/rng.h>
 #include <crypto/akcipher.h>
@@ -298,6 +299,7 @@
 int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh,
 		      struct nlattr **attrs)
 {
+	struct net *net = sock_net(in_skb->sk);
 	struct crypto_user_alg *p = nlmsg_data(in_nlh);
 	struct crypto_alg *alg;
 	struct sk_buff *skb;
@@ -329,7 +331,7 @@
 	if (err)
 		return err;
 
-	return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
+	return nlmsg_unicast(net->crypto_nlsk, skb, NETLINK_CB(in_skb).portid);
 }
 
 MODULE_LICENSE("GPL");
diff --git a/crypto/des_generic.c b/crypto/des_generic.c
index dc085514..6e13a4a 100644
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
@@ -13,934 +13,79 @@
 #include <linux/module.h>
 #include <linux/errno.h>
 #include <linux/crypto.h>
-#include <linux/types.h>
 
-#include <crypto/des.h>
-
-#define ROL(x, r) ((x) = rol32((x), (r)))
-#define ROR(x, r) ((x) = ror32((x), (r)))
-
-struct des_ctx {
-	u32 expkey[DES_EXPKEY_WORDS];
-};
-
-struct des3_ede_ctx {
-	u32 expkey[DES3_EDE_EXPKEY_WORDS];
-};
-
-/* Lookup tables for key expansion */
-
-static const u8 pc1[256] = {
-	0x00, 0x00, 0x40, 0x04, 0x10, 0x10, 0x50, 0x14,
-	0x04, 0x40, 0x44, 0x44, 0x14, 0x50, 0x54, 0x54,
-	0x02, 0x02, 0x42, 0x06, 0x12, 0x12, 0x52, 0x16,
-	0x06, 0x42, 0x46, 0x46, 0x16, 0x52, 0x56, 0x56,
-	0x80, 0x08, 0xc0, 0x0c, 0x90, 0x18, 0xd0, 0x1c,
-	0x84, 0x48, 0xc4, 0x4c, 0x94, 0x58, 0xd4, 0x5c,
-	0x82, 0x0a, 0xc2, 0x0e, 0x92, 0x1a, 0xd2, 0x1e,
-	0x86, 0x4a, 0xc6, 0x4e, 0x96, 0x5a, 0xd6, 0x5e,
-	0x20, 0x20, 0x60, 0x24, 0x30, 0x30, 0x70, 0x34,
-	0x24, 0x60, 0x64, 0x64, 0x34, 0x70, 0x74, 0x74,
-	0x22, 0x22, 0x62, 0x26, 0x32, 0x32, 0x72, 0x36,
-	0x26, 0x62, 0x66, 0x66, 0x36, 0x72, 0x76, 0x76,
-	0xa0, 0x28, 0xe0, 0x2c, 0xb0, 0x38, 0xf0, 0x3c,
-	0xa4, 0x68, 0xe4, 0x6c, 0xb4, 0x78, 0xf4, 0x7c,
-	0xa2, 0x2a, 0xe2, 0x2e, 0xb2, 0x3a, 0xf2, 0x3e,
-	0xa6, 0x6a, 0xe6, 0x6e, 0xb6, 0x7a, 0xf6, 0x7e,
-	0x08, 0x80, 0x48, 0x84, 0x18, 0x90, 0x58, 0x94,
-	0x0c, 0xc0, 0x4c, 0xc4, 0x1c, 0xd0, 0x5c, 0xd4,
-	0x0a, 0x82, 0x4a, 0x86, 0x1a, 0x92, 0x5a, 0x96,
-	0x0e, 0xc2, 0x4e, 0xc6, 0x1e, 0xd2, 0x5e, 0xd6,
-	0x88, 0x88, 0xc8, 0x8c, 0x98, 0x98, 0xd8, 0x9c,
-	0x8c, 0xc8, 0xcc, 0xcc, 0x9c, 0xd8, 0xdc, 0xdc,
-	0x8a, 0x8a, 0xca, 0x8e, 0x9a, 0x9a, 0xda, 0x9e,
-	0x8e, 0xca, 0xce, 0xce, 0x9e, 0xda, 0xde, 0xde,
-	0x28, 0xa0, 0x68, 0xa4, 0x38, 0xb0, 0x78, 0xb4,
-	0x2c, 0xe0, 0x6c, 0xe4, 0x3c, 0xf0, 0x7c, 0xf4,
-	0x2a, 0xa2, 0x6a, 0xa6, 0x3a, 0xb2, 0x7a, 0xb6,
-	0x2e, 0xe2, 0x6e, 0xe6, 0x3e, 0xf2, 0x7e, 0xf6,
-	0xa8, 0xa8, 0xe8, 0xac, 0xb8, 0xb8, 0xf8, 0xbc,
-	0xac, 0xe8, 0xec, 0xec, 0xbc, 0xf8, 0xfc, 0xfc,
-	0xaa, 0xaa, 0xea, 0xae, 0xba, 0xba, 0xfa, 0xbe,
-	0xae, 0xea, 0xee, 0xee, 0xbe, 0xfa, 0xfe, 0xfe
-};
-
-static const u8 rs[256] = {
-	0x00, 0x00, 0x80, 0x80, 0x02, 0x02, 0x82, 0x82,
-	0x04, 0x04, 0x84, 0x84, 0x06, 0x06, 0x86, 0x86,
-	0x08, 0x08, 0x88, 0x88, 0x0a, 0x0a, 0x8a, 0x8a,
-	0x0c, 0x0c, 0x8c, 0x8c, 0x0e, 0x0e, 0x8e, 0x8e,
-	0x10, 0x10, 0x90, 0x90, 0x12, 0x12, 0x92, 0x92,
-	0x14, 0x14, 0x94, 0x94, 0x16, 0x16, 0x96, 0x96,
-	0x18, 0x18, 0x98, 0x98, 0x1a, 0x1a, 0x9a, 0x9a,
-	0x1c, 0x1c, 0x9c, 0x9c, 0x1e, 0x1e, 0x9e, 0x9e,
-	0x20, 0x20, 0xa0, 0xa0, 0x22, 0x22, 0xa2, 0xa2,
-	0x24, 0x24, 0xa4, 0xa4, 0x26, 0x26, 0xa6, 0xa6,
-	0x28, 0x28, 0xa8, 0xa8, 0x2a, 0x2a, 0xaa, 0xaa,
-	0x2c, 0x2c, 0xac, 0xac, 0x2e, 0x2e, 0xae, 0xae,
-	0x30, 0x30, 0xb0, 0xb0, 0x32, 0x32, 0xb2, 0xb2,
-	0x34, 0x34, 0xb4, 0xb4, 0x36, 0x36, 0xb6, 0xb6,
-	0x38, 0x38, 0xb8, 0xb8, 0x3a, 0x3a, 0xba, 0xba,
-	0x3c, 0x3c, 0xbc, 0xbc, 0x3e, 0x3e, 0xbe, 0xbe,
-	0x40, 0x40, 0xc0, 0xc0, 0x42, 0x42, 0xc2, 0xc2,
-	0x44, 0x44, 0xc4, 0xc4, 0x46, 0x46, 0xc6, 0xc6,
-	0x48, 0x48, 0xc8, 0xc8, 0x4a, 0x4a, 0xca, 0xca,
-	0x4c, 0x4c, 0xcc, 0xcc, 0x4e, 0x4e, 0xce, 0xce,
-	0x50, 0x50, 0xd0, 0xd0, 0x52, 0x52, 0xd2, 0xd2,
-	0x54, 0x54, 0xd4, 0xd4, 0x56, 0x56, 0xd6, 0xd6,
-	0x58, 0x58, 0xd8, 0xd8, 0x5a, 0x5a, 0xda, 0xda,
-	0x5c, 0x5c, 0xdc, 0xdc, 0x5e, 0x5e, 0xde, 0xde,
-	0x60, 0x60, 0xe0, 0xe0, 0x62, 0x62, 0xe2, 0xe2,
-	0x64, 0x64, 0xe4, 0xe4, 0x66, 0x66, 0xe6, 0xe6,
-	0x68, 0x68, 0xe8, 0xe8, 0x6a, 0x6a, 0xea, 0xea,
-	0x6c, 0x6c, 0xec, 0xec, 0x6e, 0x6e, 0xee, 0xee,
-	0x70, 0x70, 0xf0, 0xf0, 0x72, 0x72, 0xf2, 0xf2,
-	0x74, 0x74, 0xf4, 0xf4, 0x76, 0x76, 0xf6, 0xf6,
-	0x78, 0x78, 0xf8, 0xf8, 0x7a, 0x7a, 0xfa, 0xfa,
-	0x7c, 0x7c, 0xfc, 0xfc, 0x7e, 0x7e, 0xfe, 0xfe
-};
-
-static const u32 pc2[1024] = {
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00040000, 0x00000000, 0x04000000, 0x00100000,
-	0x00400000, 0x00000008, 0x00000800, 0x40000000,
-	0x00440000, 0x00000008, 0x04000800, 0x40100000,
-	0x00000400, 0x00000020, 0x08000000, 0x00000100,
-	0x00040400, 0x00000020, 0x0c000000, 0x00100100,
-	0x00400400, 0x00000028, 0x08000800, 0x40000100,
-	0x00440400, 0x00000028, 0x0c000800, 0x40100100,
-	0x80000000, 0x00000010, 0x00000000, 0x00800000,
-	0x80040000, 0x00000010, 0x04000000, 0x00900000,
-	0x80400000, 0x00000018, 0x00000800, 0x40800000,
-	0x80440000, 0x00000018, 0x04000800, 0x40900000,
-	0x80000400, 0x00000030, 0x08000000, 0x00800100,
-	0x80040400, 0x00000030, 0x0c000000, 0x00900100,
-	0x80400400, 0x00000038, 0x08000800, 0x40800100,
-	0x80440400, 0x00000038, 0x0c000800, 0x40900100,
-	0x10000000, 0x00000000, 0x00200000, 0x00001000,
-	0x10040000, 0x00000000, 0x04200000, 0x00101000,
-	0x10400000, 0x00000008, 0x00200800, 0x40001000,
-	0x10440000, 0x00000008, 0x04200800, 0x40101000,
-	0x10000400, 0x00000020, 0x08200000, 0x00001100,
-	0x10040400, 0x00000020, 0x0c200000, 0x00101100,
-	0x10400400, 0x00000028, 0x08200800, 0x40001100,
-	0x10440400, 0x00000028, 0x0c200800, 0x40101100,
-	0x90000000, 0x00000010, 0x00200000, 0x00801000,
-	0x90040000, 0x00000010, 0x04200000, 0x00901000,
-	0x90400000, 0x00000018, 0x00200800, 0x40801000,
-	0x90440000, 0x00000018, 0x04200800, 0x40901000,
-	0x90000400, 0x00000030, 0x08200000, 0x00801100,
-	0x90040400, 0x00000030, 0x0c200000, 0x00901100,
-	0x90400400, 0x00000038, 0x08200800, 0x40801100,
-	0x90440400, 0x00000038, 0x0c200800, 0x40901100,
-	0x00000200, 0x00080000, 0x00000000, 0x00000004,
-	0x00040200, 0x00080000, 0x04000000, 0x00100004,
-	0x00400200, 0x00080008, 0x00000800, 0x40000004,
-	0x00440200, 0x00080008, 0x04000800, 0x40100004,
-	0x00000600, 0x00080020, 0x08000000, 0x00000104,
-	0x00040600, 0x00080020, 0x0c000000, 0x00100104,
-	0x00400600, 0x00080028, 0x08000800, 0x40000104,
-	0x00440600, 0x00080028, 0x0c000800, 0x40100104,
-	0x80000200, 0x00080010, 0x00000000, 0x00800004,
-	0x80040200, 0x00080010, 0x04000000, 0x00900004,
-	0x80400200, 0x00080018, 0x00000800, 0x40800004,
-	0x80440200, 0x00080018, 0x04000800, 0x40900004,
-	0x80000600, 0x00080030, 0x08000000, 0x00800104,
-	0x80040600, 0x00080030, 0x0c000000, 0x00900104,
-	0x80400600, 0x00080038, 0x08000800, 0x40800104,
-	0x80440600, 0x00080038, 0x0c000800, 0x40900104,
-	0x10000200, 0x00080000, 0x00200000, 0x00001004,
-	0x10040200, 0x00080000, 0x04200000, 0x00101004,
-	0x10400200, 0x00080008, 0x00200800, 0x40001004,
-	0x10440200, 0x00080008, 0x04200800, 0x40101004,
-	0x10000600, 0x00080020, 0x08200000, 0x00001104,
-	0x10040600, 0x00080020, 0x0c200000, 0x00101104,
-	0x10400600, 0x00080028, 0x08200800, 0x40001104,
-	0x10440600, 0x00080028, 0x0c200800, 0x40101104,
-	0x90000200, 0x00080010, 0x00200000, 0x00801004,
-	0x90040200, 0x00080010, 0x04200000, 0x00901004,
-	0x90400200, 0x00080018, 0x00200800, 0x40801004,
-	0x90440200, 0x00080018, 0x04200800, 0x40901004,
-	0x90000600, 0x00080030, 0x08200000, 0x00801104,
-	0x90040600, 0x00080030, 0x0c200000, 0x00901104,
-	0x90400600, 0x00080038, 0x08200800, 0x40801104,
-	0x90440600, 0x00080038, 0x0c200800, 0x40901104,
-	0x00000002, 0x00002000, 0x20000000, 0x00000001,
-	0x00040002, 0x00002000, 0x24000000, 0x00100001,
-	0x00400002, 0x00002008, 0x20000800, 0x40000001,
-	0x00440002, 0x00002008, 0x24000800, 0x40100001,
-	0x00000402, 0x00002020, 0x28000000, 0x00000101,
-	0x00040402, 0x00002020, 0x2c000000, 0x00100101,
-	0x00400402, 0x00002028, 0x28000800, 0x40000101,
-	0x00440402, 0x00002028, 0x2c000800, 0x40100101,
-	0x80000002, 0x00002010, 0x20000000, 0x00800001,
-	0x80040002, 0x00002010, 0x24000000, 0x00900001,
-	0x80400002, 0x00002018, 0x20000800, 0x40800001,
-	0x80440002, 0x00002018, 0x24000800, 0x40900001,
-	0x80000402, 0x00002030, 0x28000000, 0x00800101,
-	0x80040402, 0x00002030, 0x2c000000, 0x00900101,
-	0x80400402, 0x00002038, 0x28000800, 0x40800101,
-	0x80440402, 0x00002038, 0x2c000800, 0x40900101,
-	0x10000002, 0x00002000, 0x20200000, 0x00001001,
-	0x10040002, 0x00002000, 0x24200000, 0x00101001,
-	0x10400002, 0x00002008, 0x20200800, 0x40001001,
-	0x10440002, 0x00002008, 0x24200800, 0x40101001,
-	0x10000402, 0x00002020, 0x28200000, 0x00001101,
-	0x10040402, 0x00002020, 0x2c200000, 0x00101101,
-	0x10400402, 0x00002028, 0x28200800, 0x40001101,
-	0x10440402, 0x00002028, 0x2c200800, 0x40101101,
-	0x90000002, 0x00002010, 0x20200000, 0x00801001,
-	0x90040002, 0x00002010, 0x24200000, 0x00901001,
-	0x90400002, 0x00002018, 0x20200800, 0x40801001,
-	0x90440002, 0x00002018, 0x24200800, 0x40901001,
-	0x90000402, 0x00002030, 0x28200000, 0x00801101,
-	0x90040402, 0x00002030, 0x2c200000, 0x00901101,
-	0x90400402, 0x00002038, 0x28200800, 0x40801101,
-	0x90440402, 0x00002038, 0x2c200800, 0x40901101,
-	0x00000202, 0x00082000, 0x20000000, 0x00000005,
-	0x00040202, 0x00082000, 0x24000000, 0x00100005,
-	0x00400202, 0x00082008, 0x20000800, 0x40000005,
-	0x00440202, 0x00082008, 0x24000800, 0x40100005,
-	0x00000602, 0x00082020, 0x28000000, 0x00000105,
-	0x00040602, 0x00082020, 0x2c000000, 0x00100105,
-	0x00400602, 0x00082028, 0x28000800, 0x40000105,
-	0x00440602, 0x00082028, 0x2c000800, 0x40100105,
-	0x80000202, 0x00082010, 0x20000000, 0x00800005,
-	0x80040202, 0x00082010, 0x24000000, 0x00900005,
-	0x80400202, 0x00082018, 0x20000800, 0x40800005,
-	0x80440202, 0x00082018, 0x24000800, 0x40900005,
-	0x80000602, 0x00082030, 0x28000000, 0x00800105,
-	0x80040602, 0x00082030, 0x2c000000, 0x00900105,
-	0x80400602, 0x00082038, 0x28000800, 0x40800105,
-	0x80440602, 0x00082038, 0x2c000800, 0x40900105,
-	0x10000202, 0x00082000, 0x20200000, 0x00001005,
-	0x10040202, 0x00082000, 0x24200000, 0x00101005,
-	0x10400202, 0x00082008, 0x20200800, 0x40001005,
-	0x10440202, 0x00082008, 0x24200800, 0x40101005,
-	0x10000602, 0x00082020, 0x28200000, 0x00001105,
-	0x10040602, 0x00082020, 0x2c200000, 0x00101105,
-	0x10400602, 0x00082028, 0x28200800, 0x40001105,
-	0x10440602, 0x00082028, 0x2c200800, 0x40101105,
-	0x90000202, 0x00082010, 0x20200000, 0x00801005,
-	0x90040202, 0x00082010, 0x24200000, 0x00901005,
-	0x90400202, 0x00082018, 0x20200800, 0x40801005,
-	0x90440202, 0x00082018, 0x24200800, 0x40901005,
-	0x90000602, 0x00082030, 0x28200000, 0x00801105,
-	0x90040602, 0x00082030, 0x2c200000, 0x00901105,
-	0x90400602, 0x00082038, 0x28200800, 0x40801105,
-	0x90440602, 0x00082038, 0x2c200800, 0x40901105,
-
-	0x00000000, 0x00000000, 0x00000000, 0x00000000,
-	0x00000000, 0x00000008, 0x00080000, 0x10000000,
-	0x02000000, 0x00000000, 0x00000080, 0x00001000,
-	0x02000000, 0x00000008, 0x00080080, 0x10001000,
-	0x00004000, 0x00000000, 0x00000040, 0x00040000,
-	0x00004000, 0x00000008, 0x00080040, 0x10040000,
-	0x02004000, 0x00000000, 0x000000c0, 0x00041000,
-	0x02004000, 0x00000008, 0x000800c0, 0x10041000,
-	0x00020000, 0x00008000, 0x08000000, 0x00200000,
-	0x00020000, 0x00008008, 0x08080000, 0x10200000,
-	0x02020000, 0x00008000, 0x08000080, 0x00201000,
-	0x02020000, 0x00008008, 0x08080080, 0x10201000,
-	0x00024000, 0x00008000, 0x08000040, 0x00240000,
-	0x00024000, 0x00008008, 0x08080040, 0x10240000,
-	0x02024000, 0x00008000, 0x080000c0, 0x00241000,
-	0x02024000, 0x00008008, 0x080800c0, 0x10241000,
-	0x00000000, 0x01000000, 0x00002000, 0x00000020,
-	0x00000000, 0x01000008, 0x00082000, 0x10000020,
-	0x02000000, 0x01000000, 0x00002080, 0x00001020,
-	0x02000000, 0x01000008, 0x00082080, 0x10001020,
-	0x00004000, 0x01000000, 0x00002040, 0x00040020,
-	0x00004000, 0x01000008, 0x00082040, 0x10040020,
-	0x02004000, 0x01000000, 0x000020c0, 0x00041020,
-	0x02004000, 0x01000008, 0x000820c0, 0x10041020,
-	0x00020000, 0x01008000, 0x08002000, 0x00200020,
-	0x00020000, 0x01008008, 0x08082000, 0x10200020,
-	0x02020000, 0x01008000, 0x08002080, 0x00201020,
-	0x02020000, 0x01008008, 0x08082080, 0x10201020,
-	0x00024000, 0x01008000, 0x08002040, 0x00240020,
-	0x00024000, 0x01008008, 0x08082040, 0x10240020,
-	0x02024000, 0x01008000, 0x080020c0, 0x00241020,
-	0x02024000, 0x01008008, 0x080820c0, 0x10241020,
-	0x00000400, 0x04000000, 0x00100000, 0x00000004,
-	0x00000400, 0x04000008, 0x00180000, 0x10000004,
-	0x02000400, 0x04000000, 0x00100080, 0x00001004,
-	0x02000400, 0x04000008, 0x00180080, 0x10001004,
-	0x00004400, 0x04000000, 0x00100040, 0x00040004,
-	0x00004400, 0x04000008, 0x00180040, 0x10040004,
-	0x02004400, 0x04000000, 0x001000c0, 0x00041004,
-	0x02004400, 0x04000008, 0x001800c0, 0x10041004,
-	0x00020400, 0x04008000, 0x08100000, 0x00200004,
-	0x00020400, 0x04008008, 0x08180000, 0x10200004,
-	0x02020400, 0x04008000, 0x08100080, 0x00201004,
-	0x02020400, 0x04008008, 0x08180080, 0x10201004,
-	0x00024400, 0x04008000, 0x08100040, 0x00240004,
-	0x00024400, 0x04008008, 0x08180040, 0x10240004,
-	0x02024400, 0x04008000, 0x081000c0, 0x00241004,
-	0x02024400, 0x04008008, 0x081800c0, 0x10241004,
-	0x00000400, 0x05000000, 0x00102000, 0x00000024,
-	0x00000400, 0x05000008, 0x00182000, 0x10000024,
-	0x02000400, 0x05000000, 0x00102080, 0x00001024,
-	0x02000400, 0x05000008, 0x00182080, 0x10001024,
-	0x00004400, 0x05000000, 0x00102040, 0x00040024,
-	0x00004400, 0x05000008, 0x00182040, 0x10040024,
-	0x02004400, 0x05000000, 0x001020c0, 0x00041024,
-	0x02004400, 0x05000008, 0x001820c0, 0x10041024,
-	0x00020400, 0x05008000, 0x08102000, 0x00200024,
-	0x00020400, 0x05008008, 0x08182000, 0x10200024,
-	0x02020400, 0x05008000, 0x08102080, 0x00201024,
-	0x02020400, 0x05008008, 0x08182080, 0x10201024,
-	0x00024400, 0x05008000, 0x08102040, 0x00240024,
-	0x00024400, 0x05008008, 0x08182040, 0x10240024,
-	0x02024400, 0x05008000, 0x081020c0, 0x00241024,
-	0x02024400, 0x05008008, 0x081820c0, 0x10241024,
-	0x00000800, 0x00010000, 0x20000000, 0x00000010,
-	0x00000800, 0x00010008, 0x20080000, 0x10000010,
-	0x02000800, 0x00010000, 0x20000080, 0x00001010,
-	0x02000800, 0x00010008, 0x20080080, 0x10001010,
-	0x00004800, 0x00010000, 0x20000040, 0x00040010,
-	0x00004800, 0x00010008, 0x20080040, 0x10040010,
-	0x02004800, 0x00010000, 0x200000c0, 0x00041010,
-	0x02004800, 0x00010008, 0x200800c0, 0x10041010,
-	0x00020800, 0x00018000, 0x28000000, 0x00200010,
-	0x00020800, 0x00018008, 0x28080000, 0x10200010,
-	0x02020800, 0x00018000, 0x28000080, 0x00201010,
-	0x02020800, 0x00018008, 0x28080080, 0x10201010,
-	0x00024800, 0x00018000, 0x28000040, 0x00240010,
-	0x00024800, 0x00018008, 0x28080040, 0x10240010,
-	0x02024800, 0x00018000, 0x280000c0, 0x00241010,
-	0x02024800, 0x00018008, 0x280800c0, 0x10241010,
-	0x00000800, 0x01010000, 0x20002000, 0x00000030,
-	0x00000800, 0x01010008, 0x20082000, 0x10000030,
-	0x02000800, 0x01010000, 0x20002080, 0x00001030,
-	0x02000800, 0x01010008, 0x20082080, 0x10001030,
-	0x00004800, 0x01010000, 0x20002040, 0x00040030,
-	0x00004800, 0x01010008, 0x20082040, 0x10040030,
-	0x02004800, 0x01010000, 0x200020c0, 0x00041030,
-	0x02004800, 0x01010008, 0x200820c0, 0x10041030,
-	0x00020800, 0x01018000, 0x28002000, 0x00200030,
-	0x00020800, 0x01018008, 0x28082000, 0x10200030,
-	0x02020800, 0x01018000, 0x28002080, 0x00201030,
-	0x02020800, 0x01018008, 0x28082080, 0x10201030,
-	0x00024800, 0x01018000, 0x28002040, 0x00240030,
-	0x00024800, 0x01018008, 0x28082040, 0x10240030,
-	0x02024800, 0x01018000, 0x280020c0, 0x00241030,
-	0x02024800, 0x01018008, 0x280820c0, 0x10241030,
-	0x00000c00, 0x04010000, 0x20100000, 0x00000014,
-	0x00000c00, 0x04010008, 0x20180000, 0x10000014,
-	0x02000c00, 0x04010000, 0x20100080, 0x00001014,
-	0x02000c00, 0x04010008, 0x20180080, 0x10001014,
-	0x00004c00, 0x04010000, 0x20100040, 0x00040014,
-	0x00004c00, 0x04010008, 0x20180040, 0x10040014,
-	0x02004c00, 0x04010000, 0x201000c0, 0x00041014,
-	0x02004c00, 0x04010008, 0x201800c0, 0x10041014,
-	0x00020c00, 0x04018000, 0x28100000, 0x00200014,
-	0x00020c00, 0x04018008, 0x28180000, 0x10200014,
-	0x02020c00, 0x04018000, 0x28100080, 0x00201014,
-	0x02020c00, 0x04018008, 0x28180080, 0x10201014,
-	0x00024c00, 0x04018000, 0x28100040, 0x00240014,
-	0x00024c00, 0x04018008, 0x28180040, 0x10240014,
-	0x02024c00, 0x04018000, 0x281000c0, 0x00241014,
-	0x02024c00, 0x04018008, 0x281800c0, 0x10241014,
-	0x00000c00, 0x05010000, 0x20102000, 0x00000034,
-	0x00000c00, 0x05010008, 0x20182000, 0x10000034,
-	0x02000c00, 0x05010000, 0x20102080, 0x00001034,
-	0x02000c00, 0x05010008, 0x20182080, 0x10001034,
-	0x00004c00, 0x05010000, 0x20102040, 0x00040034,
-	0x00004c00, 0x05010008, 0x20182040, 0x10040034,
-	0x02004c00, 0x05010000, 0x201020c0, 0x00041034,
-	0x02004c00, 0x05010008, 0x201820c0, 0x10041034,
-	0x00020c00, 0x05018000, 0x28102000, 0x00200034,
-	0x00020c00, 0x05018008, 0x28182000, 0x10200034,
-	0x02020c00, 0x05018000, 0x28102080, 0x00201034,
-	0x02020c00, 0x05018008, 0x28182080, 0x10201034,
-	0x00024c00, 0x05018000, 0x28102040, 0x00240034,
-	0x00024c00, 0x05018008, 0x28182040, 0x10240034,
-	0x02024c00, 0x05018000, 0x281020c0, 0x00241034,
-	0x02024c00, 0x05018008, 0x281820c0, 0x10241034
-};
-
-/* S-box lookup tables */
-
-static const u32 S1[64] = {
-	0x01010400, 0x00000000, 0x00010000, 0x01010404,
-	0x01010004, 0x00010404, 0x00000004, 0x00010000,
-	0x00000400, 0x01010400, 0x01010404, 0x00000400,
-	0x01000404, 0x01010004, 0x01000000, 0x00000004,
-	0x00000404, 0x01000400, 0x01000400, 0x00010400,
-	0x00010400, 0x01010000, 0x01010000, 0x01000404,
-	0x00010004, 0x01000004, 0x01000004, 0x00010004,
-	0x00000000, 0x00000404, 0x00010404, 0x01000000,
-	0x00010000, 0x01010404, 0x00000004, 0x01010000,
-	0x01010400, 0x01000000, 0x01000000, 0x00000400,
-	0x01010004, 0x00010000, 0x00010400, 0x01000004,
-	0x00000400, 0x00000004, 0x01000404, 0x00010404,
-	0x01010404, 0x00010004, 0x01010000, 0x01000404,
-	0x01000004, 0x00000404, 0x00010404, 0x01010400,
-	0x00000404, 0x01000400, 0x01000400, 0x00000000,
-	0x00010004, 0x00010400, 0x00000000, 0x01010004
-};
-
-static const u32 S2[64] = {
-	0x80108020, 0x80008000, 0x00008000, 0x00108020,
-	0x00100000, 0x00000020, 0x80100020, 0x80008020,
-	0x80000020, 0x80108020, 0x80108000, 0x80000000,
-	0x80008000, 0x00100000, 0x00000020, 0x80100020,
-	0x00108000, 0x00100020, 0x80008020, 0x00000000,
-	0x80000000, 0x00008000, 0x00108020, 0x80100000,
-	0x00100020, 0x80000020, 0x00000000, 0x00108000,
-	0x00008020, 0x80108000, 0x80100000, 0x00008020,
-	0x00000000, 0x00108020, 0x80100020, 0x00100000,
-	0x80008020, 0x80100000, 0x80108000, 0x00008000,
-	0x80100000, 0x80008000, 0x00000020, 0x80108020,
-	0x00108020, 0x00000020, 0x00008000, 0x80000000,
-	0x00008020, 0x80108000, 0x00100000, 0x80000020,
-	0x00100020, 0x80008020, 0x80000020, 0x00100020,
-	0x00108000, 0x00000000, 0x80008000, 0x00008020,
-	0x80000000, 0x80100020, 0x80108020, 0x00108000
-};
-
-static const u32 S3[64] = {
-	0x00000208, 0x08020200, 0x00000000, 0x08020008,
-	0x08000200, 0x00000000, 0x00020208, 0x08000200,
-	0x00020008, 0x08000008, 0x08000008, 0x00020000,
-	0x08020208, 0x00020008, 0x08020000, 0x00000208,
-	0x08000000, 0x00000008, 0x08020200, 0x00000200,
-	0x00020200, 0x08020000, 0x08020008, 0x00020208,
-	0x08000208, 0x00020200, 0x00020000, 0x08000208,
-	0x00000008, 0x08020208, 0x00000200, 0x08000000,
-	0x08020200, 0x08000000, 0x00020008, 0x00000208,
-	0x00020000, 0x08020200, 0x08000200, 0x00000000,
-	0x00000200, 0x00020008, 0x08020208, 0x08000200,
-	0x08000008, 0x00000200, 0x00000000, 0x08020008,
-	0x08000208, 0x00020000, 0x08000000, 0x08020208,
-	0x00000008, 0x00020208, 0x00020200, 0x08000008,
-	0x08020000, 0x08000208, 0x00000208, 0x08020000,
-	0x00020208, 0x00000008, 0x08020008, 0x00020200
-};
-
-static const u32 S4[64] = {
-	0x00802001, 0x00002081, 0x00002081, 0x00000080,
-	0x00802080, 0x00800081, 0x00800001, 0x00002001,
-	0x00000000, 0x00802000, 0x00802000, 0x00802081,
-	0x00000081, 0x00000000, 0x00800080, 0x00800001,
-	0x00000001, 0x00002000, 0x00800000, 0x00802001,
-	0x00000080, 0x00800000, 0x00002001, 0x00002080,
-	0x00800081, 0x00000001, 0x00002080, 0x00800080,
-	0x00002000, 0x00802080, 0x00802081, 0x00000081,
-	0x00800080, 0x00800001, 0x00802000, 0x00802081,
-	0x00000081, 0x00000000, 0x00000000, 0x00802000,
-	0x00002080, 0x00800080, 0x00800081, 0x00000001,
-	0x00802001, 0x00002081, 0x00002081, 0x00000080,
-	0x00802081, 0x00000081, 0x00000001, 0x00002000,
-	0x00800001, 0x00002001, 0x00802080, 0x00800081,
-	0x00002001, 0x00002080, 0x00800000, 0x00802001,
-	0x00000080, 0x00800000, 0x00002000, 0x00802080
-};
-
-static const u32 S5[64] = {
-	0x00000100, 0x02080100, 0x02080000, 0x42000100,
-	0x00080000, 0x00000100, 0x40000000, 0x02080000,
-	0x40080100, 0x00080000, 0x02000100, 0x40080100,
-	0x42000100, 0x42080000, 0x00080100, 0x40000000,
-	0x02000000, 0x40080000, 0x40080000, 0x00000000,
-	0x40000100, 0x42080100, 0x42080100, 0x02000100,
-	0x42080000, 0x40000100, 0x00000000, 0x42000000,
-	0x02080100, 0x02000000, 0x42000000, 0x00080100,
-	0x00080000, 0x42000100, 0x00000100, 0x02000000,
-	0x40000000, 0x02080000, 0x42000100, 0x40080100,
-	0x02000100, 0x40000000, 0x42080000, 0x02080100,
-	0x40080100, 0x00000100, 0x02000000, 0x42080000,
-	0x42080100, 0x00080100, 0x42000000, 0x42080100,
-	0x02080000, 0x00000000, 0x40080000, 0x42000000,
-	0x00080100, 0x02000100, 0x40000100, 0x00080000,
-	0x00000000, 0x40080000, 0x02080100, 0x40000100
-};
-
-static const u32 S6[64] = {
-	0x20000010, 0x20400000, 0x00004000, 0x20404010,
-	0x20400000, 0x00000010, 0x20404010, 0x00400000,
-	0x20004000, 0x00404010, 0x00400000, 0x20000010,
-	0x00400010, 0x20004000, 0x20000000, 0x00004010,
-	0x00000000, 0x00400010, 0x20004010, 0x00004000,
-	0x00404000, 0x20004010, 0x00000010, 0x20400010,
-	0x20400010, 0x00000000, 0x00404010, 0x20404000,
-	0x00004010, 0x00404000, 0x20404000, 0x20000000,
-	0x20004000, 0x00000010, 0x20400010, 0x00404000,
-	0x20404010, 0x00400000, 0x00004010, 0x20000010,
-	0x00400000, 0x20004000, 0x20000000, 0x00004010,
-	0x20000010, 0x20404010, 0x00404000, 0x20400000,
-	0x00404010, 0x20404000, 0x00000000, 0x20400010,
-	0x00000010, 0x00004000, 0x20400000, 0x00404010,
-	0x00004000, 0x00400010, 0x20004010, 0x00000000,
-	0x20404000, 0x20000000, 0x00400010, 0x20004010
-};
-
-static const u32 S7[64] = {
-	0x00200000, 0x04200002, 0x04000802, 0x00000000,
-	0x00000800, 0x04000802, 0x00200802, 0x04200800,
-	0x04200802, 0x00200000, 0x00000000, 0x04000002,
-	0x00000002, 0x04000000, 0x04200002, 0x00000802,
-	0x04000800, 0x00200802, 0x00200002, 0x04000800,
-	0x04000002, 0x04200000, 0x04200800, 0x00200002,
-	0x04200000, 0x00000800, 0x00000802, 0x04200802,
-	0x00200800, 0x00000002, 0x04000000, 0x00200800,
-	0x04000000, 0x00200800, 0x00200000, 0x04000802,
-	0x04000802, 0x04200002, 0x04200002, 0x00000002,
-	0x00200002, 0x04000000, 0x04000800, 0x00200000,
-	0x04200800, 0x00000802, 0x00200802, 0x04200800,
-	0x00000802, 0x04000002, 0x04200802, 0x04200000,
-	0x00200800, 0x00000000, 0x00000002, 0x04200802,
-	0x00000000, 0x00200802, 0x04200000, 0x00000800,
-	0x04000002, 0x04000800, 0x00000800, 0x00200002
-};
-
-static const u32 S8[64] = {
-	0x10001040, 0x00001000, 0x00040000, 0x10041040,
-	0x10000000, 0x10001040, 0x00000040, 0x10000000,
-	0x00040040, 0x10040000, 0x10041040, 0x00041000,
-	0x10041000, 0x00041040, 0x00001000, 0x00000040,
-	0x10040000, 0x10000040, 0x10001000, 0x00001040,
-	0x00041000, 0x00040040, 0x10040040, 0x10041000,
-	0x00001040, 0x00000000, 0x00000000, 0x10040040,
-	0x10000040, 0x10001000, 0x00041040, 0x00040000,
-	0x00041040, 0x00040000, 0x10041000, 0x00001000,
-	0x00000040, 0x10040040, 0x00001000, 0x00041040,
-	0x10001000, 0x00000040, 0x10000040, 0x10040000,
-	0x10040040, 0x10000000, 0x00040000, 0x10001040,
-	0x00000000, 0x10041040, 0x00040040, 0x10000040,
-	0x10040000, 0x10001000, 0x10001040, 0x00000000,
-	0x10041040, 0x00041000, 0x00041000, 0x00001040,
-	0x00001040, 0x00040040, 0x10000000, 0x10041000
-};
-
-/* Encryption components: IP, FP, and round function */
-
-#define IP(L, R, T)		\
-	ROL(R, 4);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xf0f0f0f0;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROL(R, 12);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xffff0000;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROR(R, 14);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xcccccccc;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROL(R, 6);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xff00ff00;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROR(R, 7);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xaaaaaaaa;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROL(L, 1);
-
-#define FP(L, R, T)		\
-	ROR(L, 1);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xaaaaaaaa;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROL(R, 7);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xff00ff00;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROR(R, 6);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xcccccccc;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROL(R, 14);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xffff0000;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROR(R, 12);		\
-	T  = L;			\
-	L ^= R;			\
-	L &= 0xf0f0f0f0;	\
-	R ^= L;			\
-	L ^= T;			\
-	ROR(R, 4);
-
-#define ROUND(L, R, A, B, K, d)					\
-	B = K[0];			A = K[1];	K += d;	\
-	B ^= R;				A ^= R;			\
-	B &= 0x3f3f3f3f;		ROR(A, 4);		\
-	L ^= S8[0xff & B];		A &= 0x3f3f3f3f;	\
-	L ^= S6[0xff & (B >> 8)];	B >>= 16;		\
-	L ^= S7[0xff & A];					\
-	L ^= S5[0xff & (A >> 8)];	A >>= 16;		\
-	L ^= S4[0xff & B];					\
-	L ^= S2[0xff & (B >> 8)];				\
-	L ^= S3[0xff & A];					\
-	L ^= S1[0xff & (A >> 8)];
-
-/*
- * PC2 lookup tables are organized as 2 consecutive sets of 4 interleaved
- * tables of 128 elements.  One set is for C_i and the other for D_i, while
- * the 4 interleaved tables correspond to four 7-bit subsets of C_i or D_i.
- *
- * After PC1 each of the variables a,b,c,d contains a 7 bit subset of C_i
- * or D_i in bits 7-1 (bit 0 being the least significant).
- */
-
-#define T1(x) pt[2 * (x) + 0]
-#define T2(x) pt[2 * (x) + 1]
-#define T3(x) pt[2 * (x) + 2]
-#define T4(x) pt[2 * (x) + 3]
-
-#define DES_PC2(a, b, c, d) (T4(d) | T3(c) | T2(b) | T1(a))
-
-/*
- * Encryption key expansion
- *
- * RFC2451: Weak key checks SHOULD be performed.
- *
- * FIPS 74:
- *
- *   Keys having duals are keys which produce all zeros, all ones, or
- *   alternating zero-one patterns in the C and D registers after Permuted
- *   Choice 1 has operated on the key.
- *
- */
-unsigned long des_ekey(u32 *pe, const u8 *k)
-{
-	/* K&R: long is at least 32 bits */
-	unsigned long a, b, c, d, w;
-	const u32 *pt = pc2;
-
-	d = k[4]; d &= 0x0e; d <<= 4; d |= k[0] & 0x1e; d = pc1[d];
-	c = k[5]; c &= 0x0e; c <<= 4; c |= k[1] & 0x1e; c = pc1[c];
-	b = k[6]; b &= 0x0e; b <<= 4; b |= k[2] & 0x1e; b = pc1[b];
-	a = k[7]; a &= 0x0e; a <<= 4; a |= k[3] & 0x1e; a = pc1[a];
-
-	pe[15 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d];
-	pe[14 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[13 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[12 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[11 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[10 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 9 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 8 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c];
-	pe[ 7 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 6 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 5 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 4 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 3 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 2 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 1 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b];
-	pe[ 0 * 2 + 0] = DES_PC2(b, c, d, a);
-
-	/* Check if first half is weak */
-	w  = (a ^ c) | (b ^ d) | (rs[a] ^ c) | (b ^ rs[d]);
-
-	/* Skip to next table set */
-	pt += 512;
-
-	d = k[0]; d &= 0xe0; d >>= 4; d |= k[4] & 0xf0; d = pc1[d + 1];
-	c = k[1]; c &= 0xe0; c >>= 4; c |= k[5] & 0xf0; c = pc1[c + 1];
-	b = k[2]; b &= 0xe0; b >>= 4; b |= k[6] & 0xf0; b = pc1[b + 1];
-	a = k[3]; a &= 0xe0; a >>= 4; a |= k[7] & 0xf0; a = pc1[a + 1];
-
-	/* Check if second half is weak */
-	w |= (a ^ c) | (b ^ d) | (rs[a] ^ c) | (b ^ rs[d]);
-
-	pe[15 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d];
-	pe[14 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[13 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[12 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[11 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[10 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 9 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 8 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c];
-	pe[ 7 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 6 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 5 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 4 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 3 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 2 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[ 1 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b];
-	pe[ 0 * 2 + 1] = DES_PC2(b, c, d, a);
-
-	/* Fixup: 2413 5768 -> 1357 2468 */
-	for (d = 0; d < 16; ++d) {
-		a = pe[2 * d];
-		b = pe[2 * d + 1];
-		c = a ^ b;
-		c &= 0xffff0000;
-		a ^= c;
-		b ^= c;
-		ROL(b, 18);
-		pe[2 * d] = a;
-		pe[2 * d + 1] = b;
-	}
-
-	/* Zero if weak key */
-	return w;
-}
-EXPORT_SYMBOL_GPL(des_ekey);
-
-/*
- * Decryption key expansion
- *
- * No weak key checking is performed, as this is only used by triple DES
- *
- */
-static void dkey(u32 *pe, const u8 *k)
-{
-	/* K&R: long is at least 32 bits */
-	unsigned long a, b, c, d;
-	const u32 *pt = pc2;
-
-	d = k[4]; d &= 0x0e; d <<= 4; d |= k[0] & 0x1e; d = pc1[d];
-	c = k[5]; c &= 0x0e; c <<= 4; c |= k[1] & 0x1e; c = pc1[c];
-	b = k[6]; b &= 0x0e; b <<= 4; b |= k[2] & 0x1e; b = pc1[b];
-	a = k[7]; a &= 0x0e; a <<= 4; a |= k[3] & 0x1e; a = pc1[a];
-
-	pe[ 0 * 2] = DES_PC2(a, b, c, d); d = rs[d];
-	pe[ 1 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 2 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 3 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 4 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 5 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 6 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 7 * 2] = DES_PC2(d, a, b, c); c = rs[c];
-	pe[ 8 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 9 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[10 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[11 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[12 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[13 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[14 * 2] = DES_PC2(c, d, a, b); b = rs[b];
-	pe[15 * 2] = DES_PC2(b, c, d, a);
-
-	/* Skip to next table set */
-	pt += 512;
-
-	d = k[0]; d &= 0xe0; d >>= 4; d |= k[4] & 0xf0; d = pc1[d + 1];
-	c = k[1]; c &= 0xe0; c >>= 4; c |= k[5] & 0xf0; c = pc1[c + 1];
-	b = k[2]; b &= 0xe0; b >>= 4; b |= k[6] & 0xf0; b = pc1[b + 1];
-	a = k[3]; a &= 0xe0; a >>= 4; a |= k[7] & 0xf0; a = pc1[a + 1];
-
-	pe[ 0 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d];
-	pe[ 1 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 2 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 3 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 4 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 5 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
-	pe[ 6 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
-	pe[ 7 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c];
-	pe[ 8 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[ 9 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[10 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[11 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[12 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
-	pe[13 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
-	pe[14 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b];
-	pe[15 * 2 + 1] = DES_PC2(b, c, d, a);
-
-	/* Fixup: 2413 5768 -> 1357 2468 */
-	for (d = 0; d < 16; ++d) {
-		a = pe[2 * d];
-		b = pe[2 * d + 1];
-		c = a ^ b;
-		c &= 0xffff0000;
-		a ^= c;
-		b ^= c;
-		ROL(b, 18);
-		pe[2 * d] = a;
-		pe[2 * d + 1] = b;
-	}
-}
+#include <crypto/internal/des.h>
 
 static int des_setkey(struct crypto_tfm *tfm, const u8 *key,
 		      unsigned int keylen)
 {
 	struct des_ctx *dctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	u32 tmp[DES_EXPKEY_WORDS];
-	int ret;
-
-	/* Expand to tmp */
-	ret = des_ekey(tmp, key);
-
-	if (unlikely(ret == 0) && (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
-
-	/* Copy to output */
-	memcpy(dctx->expkey, tmp, sizeof(dctx->expkey));
-
-	return 0;
-}
-
-static void des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct des_ctx *ctx = crypto_tfm_ctx(tfm);
-	const u32 *K = ctx->expkey;
-	const __le32 *s = (const __le32 *)src;
-	__le32 *d = (__le32 *)dst;
-	u32 L, R, A, B;
-	int i;
-
-	L = le32_to_cpu(s[0]);
-	R = le32_to_cpu(s[1]);
-
-	IP(L, R, A);
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, 2);
-		ROUND(R, L, A, B, K, 2);
-	}
-	FP(R, L, A);
-
-	d[0] = cpu_to_le32(R);
-	d[1] = cpu_to_le32(L);
-}
-
-static void des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
-{
-	struct des_ctx *ctx = crypto_tfm_ctx(tfm);
-	const u32 *K = ctx->expkey + DES_EXPKEY_WORDS - 2;
-	const __le32 *s = (const __le32 *)src;
-	__le32 *d = (__le32 *)dst;
-	u32 L, R, A, B;
-	int i;
-
-	L = le32_to_cpu(s[0]);
-	R = le32_to_cpu(s[1]);
-
-	IP(L, R, A);
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, -2);
-		ROUND(R, L, A, B, K, -2);
-	}
-	FP(R, L, A);
-
-	d[0] = cpu_to_le32(R);
-	d[1] = cpu_to_le32(L);
-}
-
-/*
- * RFC2451:
- *
- *   For DES-EDE3, there is no known need to reject weak or
- *   complementation keys.  Any weakness is obviated by the use of
- *   multiple keys.
- *
- *   However, if the first two or last two independent 64-bit keys are
- *   equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
- *   same as DES.  Implementers MUST reject keys that exhibit this
- *   property.
- *
- */
-int __des3_ede_setkey(u32 *expkey, u32 *flags, const u8 *key,
-		      unsigned int keylen)
-{
 	int err;
 
-	err = __des3_verify_key(flags, key);
-	if (unlikely(err))
-		return err;
+	err = des_expand_key(dctx, key, keylen);
+	if (err == -ENOKEY) {
+		if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)
+			err = -EINVAL;
+		else
+			err = 0;
+	}
 
-	des_ekey(expkey, key); expkey += DES_EXPKEY_WORDS; key += DES_KEY_SIZE;
-	dkey(expkey, key); expkey += DES_EXPKEY_WORDS; key += DES_KEY_SIZE;
-	des_ekey(expkey, key);
-
-	return 0;
+	if (err) {
+		memset(dctx, 0, sizeof(*dctx));
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+	}
+	return err;
 }
-EXPORT_SYMBOL_GPL(__des3_ede_setkey);
+
+static void crypto_des_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	const struct des_ctx *dctx = crypto_tfm_ctx(tfm);
+
+	des_encrypt(dctx, dst, src);
+}
+
+static void crypto_des_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+{
+	const struct des_ctx *dctx = crypto_tfm_ctx(tfm);
+
+	des_decrypt(dctx, dst, src);
+}
 
 static int des3_ede_setkey(struct crypto_tfm *tfm, const u8 *key,
 			   unsigned int keylen)
 {
 	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	u32 *expkey = dctx->expkey;
+	int err;
 
-	return __des3_ede_setkey(expkey, flags, key, keylen);
+	err = des3_ede_expand_key(dctx, key, keylen);
+	if (err == -ENOKEY) {
+		if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)
+			err = -EINVAL;
+		else
+			err = 0;
+	}
+
+	if (err) {
+		memset(dctx, 0, sizeof(*dctx));
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+	}
+	return err;
 }
 
-static void des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void crypto_des3_ede_encrypt(struct crypto_tfm *tfm, u8 *dst,
+				    const u8 *src)
 {
-	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u32 *K = dctx->expkey;
-	const __le32 *s = (const __le32 *)src;
-	__le32 *d = (__le32 *)dst;
-	u32 L, R, A, B;
-	int i;
+	const struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
 
-	L = le32_to_cpu(s[0]);
-	R = le32_to_cpu(s[1]);
-
-	IP(L, R, A);
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, 2);
-		ROUND(R, L, A, B, K, 2);
-	}
-	for (i = 0; i < 8; i++) {
-		ROUND(R, L, A, B, K, 2);
-		ROUND(L, R, A, B, K, 2);
-	}
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, 2);
-		ROUND(R, L, A, B, K, 2);
-	}
-	FP(R, L, A);
-
-	d[0] = cpu_to_le32(R);
-	d[1] = cpu_to_le32(L);
+	des3_ede_encrypt(dctx, dst, src);
 }
 
-static void des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
+static void crypto_des3_ede_decrypt(struct crypto_tfm *tfm, u8 *dst,
+				    const u8 *src)
 {
-	struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
-	const u32 *K = dctx->expkey + DES3_EDE_EXPKEY_WORDS - 2;
-	const __le32 *s = (const __le32 *)src;
-	__le32 *d = (__le32 *)dst;
-	u32 L, R, A, B;
-	int i;
+	const struct des3_ede_ctx *dctx = crypto_tfm_ctx(tfm);
 
-	L = le32_to_cpu(s[0]);
-	R = le32_to_cpu(s[1]);
-
-	IP(L, R, A);
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, -2);
-		ROUND(R, L, A, B, K, -2);
-	}
-	for (i = 0; i < 8; i++) {
-		ROUND(R, L, A, B, K, -2);
-		ROUND(L, R, A, B, K, -2);
-	}
-	for (i = 0; i < 8; i++) {
-		ROUND(L, R, A, B, K, -2);
-		ROUND(R, L, A, B, K, -2);
-	}
-	FP(R, L, A);
-
-	d[0] = cpu_to_le32(R);
-	d[1] = cpu_to_le32(L);
+	des3_ede_decrypt(dctx, dst, src);
 }
 
 static struct crypto_alg des_algs[2] = { {
@@ -951,13 +96,12 @@
 	.cra_blocksize		=	DES_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct des_ctx),
 	.cra_module		=	THIS_MODULE,
-	.cra_alignmask		=	3,
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	DES_KEY_SIZE,
 	.cia_max_keysize	=	DES_KEY_SIZE,
 	.cia_setkey		=	des_setkey,
-	.cia_encrypt		=	des_encrypt,
-	.cia_decrypt		=	des_decrypt } }
+	.cia_encrypt		=	crypto_des_encrypt,
+	.cia_decrypt		=	crypto_des_decrypt } }
 }, {
 	.cra_name		=	"des3_ede",
 	.cra_driver_name	=	"des3_ede-generic",
@@ -966,13 +110,12 @@
 	.cra_blocksize		=	DES3_EDE_BLOCK_SIZE,
 	.cra_ctxsize		=	sizeof(struct des3_ede_ctx),
 	.cra_module		=	THIS_MODULE,
-	.cra_alignmask		=	3,
 	.cra_u			=	{ .cipher = {
 	.cia_min_keysize	=	DES3_EDE_KEY_SIZE,
 	.cia_max_keysize	=	DES3_EDE_KEY_SIZE,
 	.cia_setkey		=	des3_ede_setkey,
-	.cia_encrypt		=	des3_ede_encrypt,
-	.cia_decrypt		=	des3_ede_decrypt } }
+	.cia_encrypt		=	crypto_des3_ede_encrypt,
+	.cia_decrypt		=	crypto_des3_ede_decrypt } }
 } };
 
 static int __init des_generic_mod_init(void)
diff --git a/crypto/fips.c b/crypto/fips.c
index c0b3a3c..7b1d8ca 100644
--- a/crypto/fips.c
+++ b/crypto/fips.c
@@ -11,10 +11,14 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/sysctl.h>
+#include <linux/notifier.h>
 
 int fips_enabled;
 EXPORT_SYMBOL_GPL(fips_enabled);
 
+ATOMIC_NOTIFIER_HEAD(fips_fail_notif_chain);
+EXPORT_SYMBOL_GPL(fips_fail_notif_chain);
+
 /* Process kernel command-line parameter at boot time. fips=0 or fips=1 */
 static int fips_enable(char *str)
 {
@@ -58,6 +62,13 @@
 	unregister_sysctl_table(crypto_sysctls);
 }
 
+void fips_fail_notify(void)
+{
+	if (fips_enabled)
+		atomic_notifier_call_chain(&fips_fail_notif_chain, 0, NULL);
+}
+EXPORT_SYMBOL_GPL(fips_fail_notify);
+
 static int __init fips_init(void)
 {
 	crypto_proc_fips_init();
diff --git a/crypto/gcm.c b/crypto/gcm.c
index f254e2d..7388420 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -152,20 +152,7 @@
 static int crypto_gcm_setauthsize(struct crypto_aead *tfm,
 				  unsigned int authsize)
 {
-	switch (authsize) {
-	case 4:
-	case 8:
-	case 12:
-	case 13:
-	case 14:
-	case 15:
-	case 16:
-		break;
-	default:
-		return -EINVAL;
-	}
-
-	return 0;
+	return crypto_gcm_check_authsize(authsize);
 }
 
 static void crypto_gcm_init_common(struct aead_request *req)
@@ -762,15 +749,11 @@
 				      unsigned int authsize)
 {
 	struct crypto_rfc4106_ctx *ctx = crypto_aead_ctx(parent);
+	int err;
 
-	switch (authsize) {
-	case 8:
-	case 12:
-	case 16:
-		break;
-	default:
-		return -EINVAL;
-	}
+	err = crypto_rfc4106_check_authsize(authsize);
+	if (err)
+		return err;
 
 	return crypto_aead_setauthsize(ctx->child, authsize);
 }
@@ -818,8 +801,11 @@
 
 static int crypto_rfc4106_encrypt(struct aead_request *req)
 {
-	if (req->assoclen != 16 && req->assoclen != 20)
-		return -EINVAL;
+	int err;
+
+	err = crypto_ipsec_check_assoclen(req->assoclen);
+	if (err)
+		return err;
 
 	req = crypto_rfc4106_crypt(req);
 
@@ -828,8 +814,11 @@
 
 static int crypto_rfc4106_decrypt(struct aead_request *req)
 {
-	if (req->assoclen != 16 && req->assoclen != 20)
-		return -EINVAL;
+	int err;
+
+	err = crypto_ipsec_check_assoclen(req->assoclen);
+	if (err)
+		return err;
 
 	req = crypto_rfc4106_crypt(req);
 
@@ -1045,12 +1034,14 @@
 
 static int crypto_rfc4543_encrypt(struct aead_request *req)
 {
-	return crypto_rfc4543_crypt(req, true);
+	return crypto_ipsec_check_assoclen(req->assoclen) ?:
+	       crypto_rfc4543_crypt(req, true);
 }
 
 static int crypto_rfc4543_decrypt(struct aead_request *req)
 {
-	return crypto_rfc4543_crypt(req, false);
+	return crypto_ipsec_check_assoclen(req->assoclen) ?:
+	       crypto_rfc4543_crypt(req, false);
 }
 
 static int crypto_rfc4543_init_tfm(struct crypto_aead *tfm)
diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c
index dad9e1f..5027b34 100644
--- a/crypto/ghash-generic.c
+++ b/crypto/ghash-generic.c
@@ -1,12 +1,37 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * GHASH: digest algorithm for GCM (Galois/Counter Mode).
+ * GHASH: hash function for GCM (Galois/Counter Mode).
  *
  * Copyright (c) 2007 Nokia Siemens Networks - Mikko Herranen <mh1@iki.fi>
  * Copyright (c) 2009 Intel Corp.
  *   Author: Huang Ying <ying.huang@intel.com>
+ */
+
+/*
+ * GHASH is a keyed hash function used in GCM authentication tag generation.
  *
- * The algorithm implementation is copied from gcm.c.
+ * The original GCM paper [1] presents GHASH as a function GHASH(H, A, C) which
+ * takes a 16-byte hash key H, additional authenticated data A, and a ciphertext
+ * C.  It formats A and C into a single byte string X, interprets X as a
+ * polynomial over GF(2^128), and evaluates this polynomial at the point H.
+ *
+ * However, the NIST standard for GCM [2] presents GHASH as GHASH(H, X) where X
+ * is the already-formatted byte string containing both A and C.
+ *
+ * "ghash" in the Linux crypto API uses the 'X' (pre-formatted) convention,
+ * since the API supports only a single data stream per hash.  Thus, the
+ * formatting of 'A' and 'C' is done in the "gcm" template, not in "ghash".
+ *
+ * The reason "ghash" is separate from "gcm" is to allow "gcm" to use an
+ * accelerated "ghash" when a standalone accelerated "gcm(aes)" is unavailable.
+ * It is generally inappropriate to use "ghash" for other purposes, since it is
+ * an "ε-almost-XOR-universal hash function", not a cryptographic hash function.
+ * It can only be used securely in crypto modes specially designed to use it.
+ *
+ * [1] The Galois/Counter Mode of Operation (GCM)
+ *     (http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.694.695&rep=rep1&type=pdf)
+ * [2] Recommendation for Block Cipher Modes of Operation: Galois/Counter Mode (GCM) and GMAC
+ *     (https://csrc.nist.gov/publications/detail/sp/800-38d/final)
  */
 
 #include <crypto/algapi.h>
@@ -156,6 +181,6 @@
 module_exit(ghash_mod_exit);
 
 MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("GHASH Message Digest Algorithm");
+MODULE_DESCRIPTION("GHASH hash function");
 MODULE_ALIAS_CRYPTO("ghash");
 MODULE_ALIAS_CRYPTO("ghash-generic");
diff --git a/crypto/morus1280.c b/crypto/morus1280.c
deleted file mode 100644
index f8734c6..0000000
--- a/crypto/morus1280.c
+++ /dev/null
@@ -1,542 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/morus_common.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-
-#define MORUS1280_WORD_SIZE 8
-#define MORUS1280_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS1280_WORD_SIZE)
-#define MORUS1280_BLOCK_ALIGN (__alignof__(__le64))
-#define MORUS1280_ALIGNED(p) IS_ALIGNED((uintptr_t)p, MORUS1280_BLOCK_ALIGN)
-
-struct morus1280_block {
-	u64 words[MORUS_BLOCK_WORDS];
-};
-
-union morus1280_block_in {
-	__le64 words[MORUS_BLOCK_WORDS];
-	u8 bytes[MORUS1280_BLOCK_SIZE];
-};
-
-struct morus1280_state {
-	struct morus1280_block s[MORUS_STATE_BLOCKS];
-};
-
-struct morus1280_ctx {
-	struct morus1280_block key;
-};
-
-struct morus1280_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_chunk)(struct morus1280_state *state,
-			    u8 *dst, const u8 *src, unsigned int size);
-};
-
-static const struct morus1280_block crypto_morus1280_const[1] = {
-	{ .words = {
-		U64_C(0x0d08050302010100),
-		U64_C(0x6279e99059372215),
-		U64_C(0xf12fc26d55183ddb),
-		U64_C(0xdd28b57342311120),
-	} },
-};
-
-static void crypto_morus1280_round(struct morus1280_block *b0,
-				   struct morus1280_block *b1,
-				   struct morus1280_block *b2,
-				   struct morus1280_block *b3,
-				   struct morus1280_block *b4,
-				   const struct morus1280_block *m,
-				   unsigned int b, unsigned int w)
-{
-	unsigned int i;
-	struct morus1280_block tmp;
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		b0->words[i] ^= b1->words[i] & b2->words[i];
-		b0->words[i] ^= b3->words[i];
-		b0->words[i] ^= m->words[i];
-		b0->words[i] = rol64(b0->words[i], b);
-	}
-
-	tmp = *b3;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		b3->words[(i + w) % MORUS_BLOCK_WORDS] = tmp.words[i];
-}
-
-static void crypto_morus1280_update(struct morus1280_state *state,
-				    const struct morus1280_block *m)
-{
-	static const struct morus1280_block z = {};
-
-	struct morus1280_block *s = state->s;
-
-	crypto_morus1280_round(&s[0], &s[1], &s[2], &s[3], &s[4], &z, 13, 1);
-	crypto_morus1280_round(&s[1], &s[2], &s[3], &s[4], &s[0], m,  46, 2);
-	crypto_morus1280_round(&s[2], &s[3], &s[4], &s[0], &s[1], m,  38, 3);
-	crypto_morus1280_round(&s[3], &s[4], &s[0], &s[1], &s[2], m,   7, 2);
-	crypto_morus1280_round(&s[4], &s[0], &s[1], &s[2], &s[3], m,   4, 1);
-}
-
-static void crypto_morus1280_load_a(struct morus1280_block *dst, const u8 *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		dst->words[i] = le64_to_cpu(*(const __le64 *)src);
-		src += MORUS1280_WORD_SIZE;
-	}
-}
-
-static void crypto_morus1280_load_u(struct morus1280_block *dst, const u8 *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		dst->words[i] = get_unaligned_le64(src);
-		src += MORUS1280_WORD_SIZE;
-	}
-}
-
-static void crypto_morus1280_load(struct morus1280_block *dst, const u8 *src)
-{
-	if (MORUS1280_ALIGNED(src))
-		crypto_morus1280_load_a(dst, src);
-	else
-		crypto_morus1280_load_u(dst, src);
-}
-
-static void crypto_morus1280_store_a(u8 *dst, const struct morus1280_block *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		*(__le64 *)dst = cpu_to_le64(src->words[i]);
-		dst += MORUS1280_WORD_SIZE;
-	}
-}
-
-static void crypto_morus1280_store_u(u8 *dst, const struct morus1280_block *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		put_unaligned_le64(src->words[i], dst);
-		dst += MORUS1280_WORD_SIZE;
-	}
-}
-
-static void crypto_morus1280_store(u8 *dst, const struct morus1280_block *src)
-{
-	if (MORUS1280_ALIGNED(dst))
-		crypto_morus1280_store_a(dst, src);
-	else
-		crypto_morus1280_store_u(dst, src);
-}
-
-static void crypto_morus1280_ad(struct morus1280_state *state, const u8 *src,
-				unsigned int size)
-{
-	struct morus1280_block m;
-
-	if (MORUS1280_ALIGNED(src)) {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_a(&m, src);
-			crypto_morus1280_update(state, &m);
-
-			size -= MORUS1280_BLOCK_SIZE;
-			src += MORUS1280_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_u(&m, src);
-			crypto_morus1280_update(state, &m);
-
-			size -= MORUS1280_BLOCK_SIZE;
-			src += MORUS1280_BLOCK_SIZE;
-		}
-	}
-}
-
-static void crypto_morus1280_core(const struct morus1280_state *state,
-				  struct morus1280_block *blk)
-{
-	unsigned int i;
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		blk->words[(i + 3) % MORUS_BLOCK_WORDS] ^= state->s[1].words[i];
-
-        for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		blk->words[i] ^= state->s[0].words[i];
-		blk->words[i] ^= state->s[2].words[i] & state->s[3].words[i];
-	}
-}
-
-static void crypto_morus1280_encrypt_chunk(struct morus1280_state *state,
-					   u8 *dst, const u8 *src,
-					   unsigned int size)
-{
-	struct morus1280_block c, m;
-
-	if (MORUS1280_ALIGNED(src) && MORUS1280_ALIGNED(dst)) {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_a(&m, src);
-			c = m;
-			crypto_morus1280_core(state, &c);
-			crypto_morus1280_store_a(dst, &c);
-			crypto_morus1280_update(state, &m);
-
-			src += MORUS1280_BLOCK_SIZE;
-			dst += MORUS1280_BLOCK_SIZE;
-			size -= MORUS1280_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_u(&m, src);
-			c = m;
-			crypto_morus1280_core(state, &c);
-			crypto_morus1280_store_u(dst, &c);
-			crypto_morus1280_update(state, &m);
-
-			src += MORUS1280_BLOCK_SIZE;
-			dst += MORUS1280_BLOCK_SIZE;
-			size -= MORUS1280_BLOCK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union morus1280_block_in tail;
-
-		memcpy(tail.bytes, src, size);
-		memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
-
-		crypto_morus1280_load_a(&m, tail.bytes);
-		c = m;
-		crypto_morus1280_core(state, &c);
-		crypto_morus1280_store_a(tail.bytes, &c);
-		crypto_morus1280_update(state, &m);
-
-		memcpy(dst, tail.bytes, size);
-	}
-}
-
-static void crypto_morus1280_decrypt_chunk(struct morus1280_state *state,
-					   u8 *dst, const u8 *src,
-					   unsigned int size)
-{
-	struct morus1280_block m;
-
-	if (MORUS1280_ALIGNED(src) && MORUS1280_ALIGNED(dst)) {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_a(&m, src);
-			crypto_morus1280_core(state, &m);
-			crypto_morus1280_store_a(dst, &m);
-			crypto_morus1280_update(state, &m);
-
-			src += MORUS1280_BLOCK_SIZE;
-			dst += MORUS1280_BLOCK_SIZE;
-			size -= MORUS1280_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS1280_BLOCK_SIZE) {
-			crypto_morus1280_load_u(&m, src);
-			crypto_morus1280_core(state, &m);
-			crypto_morus1280_store_u(dst, &m);
-			crypto_morus1280_update(state, &m);
-
-			src += MORUS1280_BLOCK_SIZE;
-			dst += MORUS1280_BLOCK_SIZE;
-			size -= MORUS1280_BLOCK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union morus1280_block_in tail;
-
-		memcpy(tail.bytes, src, size);
-		memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
-
-		crypto_morus1280_load_a(&m, tail.bytes);
-		crypto_morus1280_core(state, &m);
-		crypto_morus1280_store_a(tail.bytes, &m);
-		memset(tail.bytes + size, 0, MORUS1280_BLOCK_SIZE - size);
-		crypto_morus1280_load_a(&m, tail.bytes);
-		crypto_morus1280_update(state, &m);
-
-		memcpy(dst, tail.bytes, size);
-	}
-}
-
-static void crypto_morus1280_init(struct morus1280_state *state,
-				  const struct morus1280_block *key,
-				  const u8 *iv)
-{
-	static const struct morus1280_block z = {};
-
-	union morus1280_block_in tmp;
-	unsigned int i;
-
-	memcpy(tmp.bytes, iv, MORUS_NONCE_SIZE);
-	memset(tmp.bytes + MORUS_NONCE_SIZE, 0,
-	       MORUS1280_BLOCK_SIZE - MORUS_NONCE_SIZE);
-
-	crypto_morus1280_load(&state->s[0], tmp.bytes);
-	state->s[1] = *key;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[2].words[i] = U64_C(0xFFFFFFFFFFFFFFFF);
-	state->s[3] = z;
-	state->s[4] = crypto_morus1280_const[0];
-
-	for (i = 0; i < 16; i++)
-		crypto_morus1280_update(state, &z);
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[1].words[i] ^= key->words[i];
-}
-
-static void crypto_morus1280_process_ad(struct morus1280_state *state,
-					struct scatterlist *sg_src,
-					unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct morus1280_block m;
-	union morus1280_block_in buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= MORUS1280_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = MORUS1280_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-
-				crypto_morus1280_load_a(&m, buf.bytes);
-				crypto_morus1280_update(state, &m);
-
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			crypto_morus1280_ad(state, src, left);
-			src += left & ~(MORUS1280_BLOCK_SIZE - 1);
-			left &= MORUS1280_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos);
-
-		crypto_morus1280_load_a(&m, buf.bytes);
-		crypto_morus1280_update(state, &m);
-	}
-}
-
-static void crypto_morus1280_process_crypt(struct morus1280_state *state,
-					   struct aead_request *req,
-					   const struct morus1280_ops *ops)
-{
-	struct skcipher_walk walk;
-
-	ops->skcipher_walk_init(&walk, req, false);
-
-	while (walk.nbytes) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		ops->crypt_chunk(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 nbytes);
-
-		skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-}
-
-static void crypto_morus1280_final(struct morus1280_state *state,
-				   struct morus1280_block *tag_xor,
-				   u64 assoclen, u64 cryptlen)
-{
-	struct morus1280_block tmp;
-	unsigned int i;
-
-	tmp.words[0] = assoclen * 8;
-	tmp.words[1] = cryptlen * 8;
-	tmp.words[2] = 0;
-	tmp.words[3] = 0;
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[4].words[i] ^= state->s[0].words[i];
-
-	for (i = 0; i < 10; i++)
-		crypto_morus1280_update(state, &tmp);
-
-	crypto_morus1280_core(state, tag_xor);
-}
-
-static int crypto_morus1280_setkey(struct crypto_aead *aead, const u8 *key,
-				   unsigned int keylen)
-{
-	struct morus1280_ctx *ctx = crypto_aead_ctx(aead);
-	union morus1280_block_in tmp;
-
-	if (keylen == MORUS1280_BLOCK_SIZE)
-		crypto_morus1280_load(&ctx->key, key);
-	else if (keylen == MORUS1280_BLOCK_SIZE / 2) {
-		memcpy(tmp.bytes, key, keylen);
-		memcpy(tmp.bytes + keylen, key, keylen);
-
-		crypto_morus1280_load(&ctx->key, tmp.bytes);
-	} else {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-static int crypto_morus1280_setauthsize(struct crypto_aead *tfm,
-					unsigned int authsize)
-{
-	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
-}
-
-static void crypto_morus1280_crypt(struct aead_request *req,
-				   struct morus1280_block *tag_xor,
-				   unsigned int cryptlen,
-				   const struct morus1280_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus1280_state state;
-
-	crypto_morus1280_init(&state, &ctx->key, req->iv);
-	crypto_morus1280_process_ad(&state, req->src, req->assoclen);
-	crypto_morus1280_process_crypt(&state, req, ops);
-	crypto_morus1280_final(&state, tag_xor, req->assoclen, cryptlen);
-}
-
-static int crypto_morus1280_encrypt(struct aead_request *req)
-{
-	static const struct morus1280_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_chunk = crypto_morus1280_encrypt_chunk,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus1280_block tag = {};
-	union morus1280_block_in tag_out;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_morus1280_crypt(req, &tag, cryptlen, &ops);
-	crypto_morus1280_store(tag_out.bytes, &tag);
-
-	scatterwalk_map_and_copy(tag_out.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-
-static int crypto_morus1280_decrypt(struct aead_request *req)
-{
-	static const struct morus1280_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_chunk = crypto_morus1280_decrypt_chunk,
-	};
-	static const u8 zeros[MORUS1280_BLOCK_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	union morus1280_block_in tag_in;
-	struct morus1280_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag_in.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_morus1280_load(&tag, tag_in.bytes);
-	crypto_morus1280_crypt(req, &tag, cryptlen, &ops);
-	crypto_morus1280_store(tag_in.bytes, &tag);
-
-	return crypto_memneq(tag_in.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-
-static int crypto_morus1280_init_tfm(struct crypto_aead *tfm)
-{
-	return 0;
-}
-
-static void crypto_morus1280_exit_tfm(struct crypto_aead *tfm)
-{
-}
-
-static struct aead_alg crypto_morus1280_alg = {
-	.setkey = crypto_morus1280_setkey,
-	.setauthsize = crypto_morus1280_setauthsize,
-	.encrypt = crypto_morus1280_encrypt,
-	.decrypt = crypto_morus1280_decrypt,
-	.init = crypto_morus1280_init_tfm,
-	.exit = crypto_morus1280_exit_tfm,
-
-	.ivsize = MORUS_NONCE_SIZE,
-	.maxauthsize = MORUS_MAX_AUTH_SIZE,
-	.chunksize = MORUS1280_BLOCK_SIZE,
-
-	.base = {
-		.cra_blocksize = 1,
-		.cra_ctxsize = sizeof(struct morus1280_ctx),
-		.cra_alignmask = 0,
-
-		.cra_priority = 100,
-
-		.cra_name = "morus1280",
-		.cra_driver_name = "morus1280-generic",
-
-		.cra_module = THIS_MODULE,
-	}
-};
-
-
-static int __init crypto_morus1280_module_init(void)
-{
-	return crypto_register_aead(&crypto_morus1280_alg);
-}
-
-static void __exit crypto_morus1280_module_exit(void)
-{
-	crypto_unregister_aead(&crypto_morus1280_alg);
-}
-
-subsys_initcall(crypto_morus1280_module_init);
-module_exit(crypto_morus1280_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm");
-MODULE_ALIAS_CRYPTO("morus1280");
-MODULE_ALIAS_CRYPTO("morus1280-generic");
diff --git a/crypto/morus640.c b/crypto/morus640.c
deleted file mode 100644
index ae5aa94..0000000
--- a/crypto/morus640.c
+++ /dev/null
@@ -1,533 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * The MORUS-640 Authenticated-Encryption Algorithm
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#include <asm/unaligned.h>
-#include <crypto/algapi.h>
-#include <crypto/internal/aead.h>
-#include <crypto/internal/skcipher.h>
-#include <crypto/morus_common.h>
-#include <crypto/scatterwalk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/scatterlist.h>
-
-#define MORUS640_WORD_SIZE 4
-#define MORUS640_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS640_WORD_SIZE)
-#define MORUS640_BLOCK_ALIGN (__alignof__(__le32))
-#define MORUS640_ALIGNED(p) IS_ALIGNED((uintptr_t)p, MORUS640_BLOCK_ALIGN)
-
-struct morus640_block {
-	u32 words[MORUS_BLOCK_WORDS];
-};
-
-union morus640_block_in {
-	__le32 words[MORUS_BLOCK_WORDS];
-	u8 bytes[MORUS640_BLOCK_SIZE];
-};
-
-struct morus640_state {
-	struct morus640_block s[MORUS_STATE_BLOCKS];
-};
-
-struct morus640_ctx {
-	struct morus640_block key;
-};
-
-struct morus640_ops {
-	int (*skcipher_walk_init)(struct skcipher_walk *walk,
-				  struct aead_request *req, bool atomic);
-
-	void (*crypt_chunk)(struct morus640_state *state,
-			    u8 *dst, const u8 *src, unsigned int size);
-};
-
-static const struct morus640_block crypto_morus640_const[2] = {
-	{ .words = {
-		U32_C(0x02010100),
-		U32_C(0x0d080503),
-		U32_C(0x59372215),
-		U32_C(0x6279e990),
-	} },
-	{ .words = {
-		U32_C(0x55183ddb),
-		U32_C(0xf12fc26d),
-		U32_C(0x42311120),
-		U32_C(0xdd28b573),
-	} },
-};
-
-static void crypto_morus640_round(struct morus640_block *b0,
-				  struct morus640_block *b1,
-				  struct morus640_block *b2,
-				  struct morus640_block *b3,
-				  struct morus640_block *b4,
-				  const struct morus640_block *m,
-				  unsigned int b, unsigned int w)
-{
-	unsigned int i;
-	struct morus640_block tmp;
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		b0->words[i] ^= b1->words[i] & b2->words[i];
-		b0->words[i] ^= b3->words[i];
-		b0->words[i] ^= m->words[i];
-		b0->words[i] = rol32(b0->words[i], b);
-	}
-
-	tmp = *b3;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		b3->words[(i + w) % MORUS_BLOCK_WORDS] = tmp.words[i];
-}
-
-static void crypto_morus640_update(struct morus640_state *state,
-				   const struct morus640_block *m)
-{
-	static const struct morus640_block z = {};
-
-	struct morus640_block *s = state->s;
-
-	crypto_morus640_round(&s[0], &s[1], &s[2], &s[3], &s[4], &z,  5, 1);
-	crypto_morus640_round(&s[1], &s[2], &s[3], &s[4], &s[0], m,  31, 2);
-	crypto_morus640_round(&s[2], &s[3], &s[4], &s[0], &s[1], m,   7, 3);
-	crypto_morus640_round(&s[3], &s[4], &s[0], &s[1], &s[2], m,  22, 2);
-	crypto_morus640_round(&s[4], &s[0], &s[1], &s[2], &s[3], m,  13, 1);
-}
-
-static void crypto_morus640_load_a(struct morus640_block *dst, const u8 *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		dst->words[i] = le32_to_cpu(*(const __le32 *)src);
-		src += MORUS640_WORD_SIZE;
-	}
-}
-
-static void crypto_morus640_load_u(struct morus640_block *dst, const u8 *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		dst->words[i] = get_unaligned_le32(src);
-		src += MORUS640_WORD_SIZE;
-	}
-}
-
-static void crypto_morus640_load(struct morus640_block *dst, const u8 *src)
-{
-	if (MORUS640_ALIGNED(src))
-		crypto_morus640_load_a(dst, src);
-	else
-		crypto_morus640_load_u(dst, src);
-}
-
-static void crypto_morus640_store_a(u8 *dst, const struct morus640_block *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		*(__le32 *)dst = cpu_to_le32(src->words[i]);
-		dst += MORUS640_WORD_SIZE;
-	}
-}
-
-static void crypto_morus640_store_u(u8 *dst, const struct morus640_block *src)
-{
-	unsigned int i;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		put_unaligned_le32(src->words[i], dst);
-		dst += MORUS640_WORD_SIZE;
-	}
-}
-
-static void crypto_morus640_store(u8 *dst, const struct morus640_block *src)
-{
-	if (MORUS640_ALIGNED(dst))
-		crypto_morus640_store_a(dst, src);
-	else
-		crypto_morus640_store_u(dst, src);
-}
-
-static void crypto_morus640_ad(struct morus640_state *state, const u8 *src,
-			       unsigned int size)
-{
-	struct morus640_block m;
-
-	if (MORUS640_ALIGNED(src)) {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_a(&m, src);
-			crypto_morus640_update(state, &m);
-
-			size -= MORUS640_BLOCK_SIZE;
-			src += MORUS640_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_u(&m, src);
-			crypto_morus640_update(state, &m);
-
-			size -= MORUS640_BLOCK_SIZE;
-			src += MORUS640_BLOCK_SIZE;
-		}
-	}
-}
-
-static void crypto_morus640_core(const struct morus640_state *state,
-				 struct morus640_block *blk)
-{
-	unsigned int i;
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		blk->words[(i + 3) % MORUS_BLOCK_WORDS] ^= state->s[1].words[i];
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++) {
-		blk->words[i] ^= state->s[0].words[i];
-		blk->words[i] ^= state->s[2].words[i] & state->s[3].words[i];
-	}
-}
-
-static void crypto_morus640_encrypt_chunk(struct morus640_state *state, u8 *dst,
-					  const u8 *src, unsigned int size)
-{
-	struct morus640_block c, m;
-
-	if (MORUS640_ALIGNED(src) && MORUS640_ALIGNED(dst)) {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_a(&m, src);
-			c = m;
-			crypto_morus640_core(state, &c);
-			crypto_morus640_store_a(dst, &c);
-			crypto_morus640_update(state, &m);
-
-			src += MORUS640_BLOCK_SIZE;
-			dst += MORUS640_BLOCK_SIZE;
-			size -= MORUS640_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_u(&m, src);
-			c = m;
-			crypto_morus640_core(state, &c);
-			crypto_morus640_store_u(dst, &c);
-			crypto_morus640_update(state, &m);
-
-			src += MORUS640_BLOCK_SIZE;
-			dst += MORUS640_BLOCK_SIZE;
-			size -= MORUS640_BLOCK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union morus640_block_in tail;
-
-		memcpy(tail.bytes, src, size);
-		memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
-
-		crypto_morus640_load_a(&m, tail.bytes);
-		c = m;
-		crypto_morus640_core(state, &c);
-		crypto_morus640_store_a(tail.bytes, &c);
-		crypto_morus640_update(state, &m);
-
-		memcpy(dst, tail.bytes, size);
-	}
-}
-
-static void crypto_morus640_decrypt_chunk(struct morus640_state *state, u8 *dst,
-					  const u8 *src, unsigned int size)
-{
-	struct morus640_block m;
-
-	if (MORUS640_ALIGNED(src) && MORUS640_ALIGNED(dst)) {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_a(&m, src);
-			crypto_morus640_core(state, &m);
-			crypto_morus640_store_a(dst, &m);
-			crypto_morus640_update(state, &m);
-
-			src += MORUS640_BLOCK_SIZE;
-			dst += MORUS640_BLOCK_SIZE;
-			size -= MORUS640_BLOCK_SIZE;
-		}
-	} else {
-		while (size >= MORUS640_BLOCK_SIZE) {
-			crypto_morus640_load_u(&m, src);
-			crypto_morus640_core(state, &m);
-			crypto_morus640_store_u(dst, &m);
-			crypto_morus640_update(state, &m);
-
-			src += MORUS640_BLOCK_SIZE;
-			dst += MORUS640_BLOCK_SIZE;
-			size -= MORUS640_BLOCK_SIZE;
-		}
-	}
-
-	if (size > 0) {
-		union morus640_block_in tail;
-
-		memcpy(tail.bytes, src, size);
-		memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
-
-		crypto_morus640_load_a(&m, tail.bytes);
-		crypto_morus640_core(state, &m);
-		crypto_morus640_store_a(tail.bytes, &m);
-		memset(tail.bytes + size, 0, MORUS640_BLOCK_SIZE - size);
-		crypto_morus640_load_a(&m, tail.bytes);
-		crypto_morus640_update(state, &m);
-
-		memcpy(dst, tail.bytes, size);
-	}
-}
-
-static void crypto_morus640_init(struct morus640_state *state,
-				 const struct morus640_block *key,
-				 const u8 *iv)
-{
-	static const struct morus640_block z = {};
-
-	unsigned int i;
-
-	crypto_morus640_load(&state->s[0], iv);
-	state->s[1] = *key;
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[2].words[i] = U32_C(0xFFFFFFFF);
-	state->s[3] = crypto_morus640_const[0];
-	state->s[4] = crypto_morus640_const[1];
-
-	for (i = 0; i < 16; i++)
-		crypto_morus640_update(state, &z);
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[1].words[i] ^= key->words[i];
-}
-
-static void crypto_morus640_process_ad(struct morus640_state *state,
-				       struct scatterlist *sg_src,
-				       unsigned int assoclen)
-{
-	struct scatter_walk walk;
-	struct morus640_block m;
-	union morus640_block_in buf;
-	unsigned int pos = 0;
-
-	scatterwalk_start(&walk, sg_src);
-	while (assoclen != 0) {
-		unsigned int size = scatterwalk_clamp(&walk, assoclen);
-		unsigned int left = size;
-		void *mapped = scatterwalk_map(&walk);
-		const u8 *src = (const u8 *)mapped;
-
-		if (pos + size >= MORUS640_BLOCK_SIZE) {
-			if (pos > 0) {
-				unsigned int fill = MORUS640_BLOCK_SIZE - pos;
-				memcpy(buf.bytes + pos, src, fill);
-
-				crypto_morus640_load_a(&m, buf.bytes);
-				crypto_morus640_update(state, &m);
-
-				pos = 0;
-				left -= fill;
-				src += fill;
-			}
-
-			crypto_morus640_ad(state, src, left);
-			src += left & ~(MORUS640_BLOCK_SIZE - 1);
-			left &= MORUS640_BLOCK_SIZE - 1;
-		}
-
-		memcpy(buf.bytes + pos, src, left);
-
-		pos += left;
-		assoclen -= size;
-		scatterwalk_unmap(mapped);
-		scatterwalk_advance(&walk, size);
-		scatterwalk_done(&walk, 0, assoclen);
-	}
-
-	if (pos > 0) {
-		memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos);
-
-		crypto_morus640_load_a(&m, buf.bytes);
-		crypto_morus640_update(state, &m);
-	}
-}
-
-static void crypto_morus640_process_crypt(struct morus640_state *state,
-					  struct aead_request *req,
-					  const struct morus640_ops *ops)
-{
-	struct skcipher_walk walk;
-
-	ops->skcipher_walk_init(&walk, req, false);
-
-	while (walk.nbytes) {
-		unsigned int nbytes = walk.nbytes;
-
-		if (nbytes < walk.total)
-			nbytes = round_down(nbytes, walk.stride);
-
-		ops->crypt_chunk(state, walk.dst.virt.addr, walk.src.virt.addr,
-				 nbytes);
-
-		skcipher_walk_done(&walk, walk.nbytes - nbytes);
-	}
-}
-
-static void crypto_morus640_final(struct morus640_state *state,
-				  struct morus640_block *tag_xor,
-				  u64 assoclen, u64 cryptlen)
-{
-	struct morus640_block tmp;
-	unsigned int i;
-
-	tmp.words[0] = lower_32_bits(assoclen * 8);
-	tmp.words[1] = upper_32_bits(assoclen * 8);
-	tmp.words[2] = lower_32_bits(cryptlen * 8);
-	tmp.words[3] = upper_32_bits(cryptlen * 8);
-
-	for (i = 0; i < MORUS_BLOCK_WORDS; i++)
-		state->s[4].words[i] ^= state->s[0].words[i];
-
-	for (i = 0; i < 10; i++)
-		crypto_morus640_update(state, &tmp);
-
-	crypto_morus640_core(state, tag_xor);
-}
-
-static int crypto_morus640_setkey(struct crypto_aead *aead, const u8 *key,
-				  unsigned int keylen)
-{
-	struct morus640_ctx *ctx = crypto_aead_ctx(aead);
-
-	if (keylen != MORUS640_BLOCK_SIZE) {
-		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	crypto_morus640_load(&ctx->key, key);
-	return 0;
-}
-
-static int crypto_morus640_setauthsize(struct crypto_aead *tfm,
-				       unsigned int authsize)
-{
-	return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL;
-}
-
-static void crypto_morus640_crypt(struct aead_request *req,
-				  struct morus640_block *tag_xor,
-				  unsigned int cryptlen,
-				  const struct morus640_ops *ops)
-{
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_ctx *ctx = crypto_aead_ctx(tfm);
-	struct morus640_state state;
-
-	crypto_morus640_init(&state, &ctx->key, req->iv);
-	crypto_morus640_process_ad(&state, req->src, req->assoclen);
-	crypto_morus640_process_crypt(&state, req, ops);
-	crypto_morus640_final(&state, tag_xor, req->assoclen, cryptlen);
-}
-
-static int crypto_morus640_encrypt(struct aead_request *req)
-{
-	static const struct morus640_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_encrypt,
-		.crypt_chunk = crypto_morus640_encrypt_chunk,
-	};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	struct morus640_block tag = {};
-	union morus640_block_in tag_out;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen;
-
-	crypto_morus640_crypt(req, &tag, cryptlen, &ops);
-	crypto_morus640_store(tag_out.bytes, &tag);
-
-	scatterwalk_map_and_copy(tag_out.bytes, req->dst,
-				 req->assoclen + cryptlen, authsize, 1);
-	return 0;
-}
-
-static int crypto_morus640_decrypt(struct aead_request *req)
-{
-	static const struct morus640_ops ops = {
-		.skcipher_walk_init = skcipher_walk_aead_decrypt,
-		.crypt_chunk = crypto_morus640_decrypt_chunk,
-	};
-	static const u8 zeros[MORUS640_BLOCK_SIZE] = {};
-
-	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
-	union morus640_block_in tag_in;
-	struct morus640_block tag;
-	unsigned int authsize = crypto_aead_authsize(tfm);
-	unsigned int cryptlen = req->cryptlen - authsize;
-
-	scatterwalk_map_and_copy(tag_in.bytes, req->src,
-				 req->assoclen + cryptlen, authsize, 0);
-
-	crypto_morus640_load(&tag, tag_in.bytes);
-	crypto_morus640_crypt(req, &tag, cryptlen, &ops);
-	crypto_morus640_store(tag_in.bytes, &tag);
-
-	return crypto_memneq(tag_in.bytes, zeros, authsize) ? -EBADMSG : 0;
-}
-
-static int crypto_morus640_init_tfm(struct crypto_aead *tfm)
-{
-	return 0;
-}
-
-static void crypto_morus640_exit_tfm(struct crypto_aead *tfm)
-{
-}
-
-static struct aead_alg crypto_morus640_alg = {
-	.setkey = crypto_morus640_setkey,
-	.setauthsize = crypto_morus640_setauthsize,
-	.encrypt = crypto_morus640_encrypt,
-	.decrypt = crypto_morus640_decrypt,
-	.init = crypto_morus640_init_tfm,
-	.exit = crypto_morus640_exit_tfm,
-
-	.ivsize = MORUS_NONCE_SIZE,
-	.maxauthsize = MORUS_MAX_AUTH_SIZE,
-	.chunksize = MORUS640_BLOCK_SIZE,
-
-	.base = {
-		.cra_blocksize = 1,
-		.cra_ctxsize = sizeof(struct morus640_ctx),
-		.cra_alignmask = 0,
-
-		.cra_priority = 100,
-
-		.cra_name = "morus640",
-		.cra_driver_name = "morus640-generic",
-
-		.cra_module = THIS_MODULE,
-	}
-};
-
-static int __init crypto_morus640_module_init(void)
-{
-	return crypto_register_aead(&crypto_morus640_alg);
-}
-
-static void __exit crypto_morus640_module_exit(void)
-{
-	crypto_unregister_aead(&crypto_morus640_alg);
-}
-
-subsys_initcall(crypto_morus640_module_init);
-module_exit(crypto_morus640_module_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>");
-MODULE_DESCRIPTION("MORUS-640 AEAD algorithm");
-MODULE_ALIAS_CRYPTO("morus640");
-MODULE_ALIAS_CRYPTO("morus640-generic");
diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c
index 0edf5b5..543792e 100644
--- a/crypto/pcrypt.c
+++ b/crypto/pcrypt.c
@@ -18,34 +18,8 @@
 #include <linux/cpu.h>
 #include <crypto/pcrypt.h>
 
-struct padata_pcrypt {
-	struct padata_instance *pinst;
-	struct workqueue_struct *wq;
-
-	/*
-	 * Cpumask for callback CPUs. It should be
-	 * equal to serial cpumask of corresponding padata instance,
-	 * so it is updated when padata notifies us about serial
-	 * cpumask change.
-	 *
-	 * cb_cpumask is protected by RCU. This fact prevents us from
-	 * using cpumask_var_t directly because the actual type of
-	 * cpumsak_var_t depends on kernel configuration(particularly on
-	 * CONFIG_CPUMASK_OFFSTACK macro). Depending on the configuration
-	 * cpumask_var_t may be either a pointer to the struct cpumask
-	 * or a variable allocated on the stack. Thus we can not safely use
-	 * cpumask_var_t with RCU operations such as rcu_assign_pointer or
-	 * rcu_dereference. So cpumask_var_t is wrapped with struct
-	 * pcrypt_cpumask which makes possible to use it with RCU.
-	 */
-	struct pcrypt_cpumask {
-		cpumask_var_t mask;
-	} *cb_cpumask;
-	struct notifier_block nblock;
-};
-
-static struct padata_pcrypt pencrypt;
-static struct padata_pcrypt pdecrypt;
+static struct padata_instance *pencrypt;
+static struct padata_instance *pdecrypt;
 static struct kset           *pcrypt_kset;
 
 struct pcrypt_instance_ctx {
@@ -58,35 +32,6 @@
 	unsigned int cb_cpu;
 };
 
-static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu,
-			      struct padata_pcrypt *pcrypt)
-{
-	unsigned int cpu_index, cpu, i;
-	struct pcrypt_cpumask *cpumask;
-
-	cpu = *cb_cpu;
-
-	rcu_read_lock_bh();
-	cpumask = rcu_dereference_bh(pcrypt->cb_cpumask);
-	if (cpumask_test_cpu(cpu, cpumask->mask))
-			goto out;
-
-	if (!cpumask_weight(cpumask->mask))
-			goto out;
-
-	cpu_index = cpu % cpumask_weight(cpumask->mask);
-
-	cpu = cpumask_first(cpumask->mask);
-	for (i = 0; i < cpu_index; i++)
-		cpu = cpumask_next(cpu, cpumask->mask);
-
-	*cb_cpu = cpu;
-
-out:
-	rcu_read_unlock_bh();
-	return padata_do_parallel(pcrypt->pinst, padata, cpu);
-}
-
 static int pcrypt_aead_setkey(struct crypto_aead *parent,
 			      const u8 *key, unsigned int keylen)
 {
@@ -158,7 +103,7 @@
 			       req->cryptlen, req->iv);
 	aead_request_set_ad(creq, req->assoclen);
 
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pencrypt);
+	err = padata_do_parallel(pencrypt, padata, &ctx->cb_cpu);
 	if (!err)
 		return -EINPROGRESS;
 
@@ -200,7 +145,7 @@
 			       req->cryptlen, req->iv);
 	aead_request_set_ad(creq, req->assoclen);
 
-	err = pcrypt_do_parallel(padata, &ctx->cb_cpu, &pdecrypt);
+	err = padata_do_parallel(pdecrypt, padata, &ctx->cb_cpu);
 	if (!err)
 		return -EINPROGRESS;
 
@@ -347,36 +292,6 @@
 	return -EINVAL;
 }
 
-static int pcrypt_cpumask_change_notify(struct notifier_block *self,
-					unsigned long val, void *data)
-{
-	struct padata_pcrypt *pcrypt;
-	struct pcrypt_cpumask *new_mask, *old_mask;
-	struct padata_cpumask *cpumask = (struct padata_cpumask *)data;
-
-	if (!(val & PADATA_CPU_SERIAL))
-		return 0;
-
-	pcrypt = container_of(self, struct padata_pcrypt, nblock);
-	new_mask = kmalloc(sizeof(*new_mask), GFP_KERNEL);
-	if (!new_mask)
-		return -ENOMEM;
-	if (!alloc_cpumask_var(&new_mask->mask, GFP_KERNEL)) {
-		kfree(new_mask);
-		return -ENOMEM;
-	}
-
-	old_mask = pcrypt->cb_cpumask;
-
-	cpumask_copy(new_mask->mask, cpumask->cbcpu);
-	rcu_assign_pointer(pcrypt->cb_cpumask, new_mask);
-	synchronize_rcu();
-
-	free_cpumask_var(old_mask->mask);
-	kfree(old_mask);
-	return 0;
-}
-
 static int pcrypt_sysfs_add(struct padata_instance *pinst, const char *name)
 {
 	int ret;
@@ -389,71 +304,25 @@
 	return ret;
 }
 
-static int pcrypt_init_padata(struct padata_pcrypt *pcrypt,
-			      const char *name)
+static int pcrypt_init_padata(struct padata_instance **pinst, const char *name)
 {
 	int ret = -ENOMEM;
-	struct pcrypt_cpumask *mask;
 
-	get_online_cpus();
+	*pinst = padata_alloc_possible(name);
+	if (!*pinst)
+		return ret;
 
-	pcrypt->wq = alloc_workqueue("%s", WQ_MEM_RECLAIM | WQ_CPU_INTENSIVE,
-				     1, name);
-	if (!pcrypt->wq)
-		goto err;
-
-	pcrypt->pinst = padata_alloc_possible(pcrypt->wq);
-	if (!pcrypt->pinst)
-		goto err_destroy_workqueue;
-
-	mask = kmalloc(sizeof(*mask), GFP_KERNEL);
-	if (!mask)
-		goto err_free_padata;
-	if (!alloc_cpumask_var(&mask->mask, GFP_KERNEL)) {
-		kfree(mask);
-		goto err_free_padata;
-	}
-
-	cpumask_and(mask->mask, cpu_possible_mask, cpu_online_mask);
-	rcu_assign_pointer(pcrypt->cb_cpumask, mask);
-
-	pcrypt->nblock.notifier_call = pcrypt_cpumask_change_notify;
-	ret = padata_register_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
+	ret = pcrypt_sysfs_add(*pinst, name);
 	if (ret)
-		goto err_free_cpumask;
-
-	ret = pcrypt_sysfs_add(pcrypt->pinst, name);
-	if (ret)
-		goto err_unregister_notifier;
-
-	put_online_cpus();
-
-	return ret;
-
-err_unregister_notifier:
-	padata_unregister_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
-err_free_cpumask:
-	free_cpumask_var(mask->mask);
-	kfree(mask);
-err_free_padata:
-	padata_free(pcrypt->pinst);
-err_destroy_workqueue:
-	destroy_workqueue(pcrypt->wq);
-err:
-	put_online_cpus();
+		padata_free(*pinst);
 
 	return ret;
 }
 
-static void pcrypt_fini_padata(struct padata_pcrypt *pcrypt)
+static void pcrypt_fini_padata(struct padata_instance *pinst)
 {
-	free_cpumask_var(pcrypt->cb_cpumask->mask);
-	kfree(pcrypt->cb_cpumask);
-
-	padata_stop(pcrypt->pinst);
-	padata_unregister_cpumask_notifier(pcrypt->pinst, &pcrypt->nblock);
-	destroy_workqueue(pcrypt->wq);
-	padata_free(pcrypt->pinst);
+	padata_stop(pinst);
+	padata_free(pinst);
 }
 
 static struct crypto_template pcrypt_tmpl = {
@@ -478,13 +347,13 @@
 	if (err)
 		goto err_deinit_pencrypt;
 
-	padata_start(pencrypt.pinst);
-	padata_start(pdecrypt.pinst);
+	padata_start(pencrypt);
+	padata_start(pdecrypt);
 
 	return crypto_register_template(&pcrypt_tmpl);
 
 err_deinit_pencrypt:
-	pcrypt_fini_padata(&pencrypt);
+	pcrypt_fini_padata(pencrypt);
 err_unreg_kset:
 	kset_unregister(pcrypt_kset);
 err:
@@ -493,8 +362,8 @@
 
 static void __exit pcrypt_exit(void)
 {
-	pcrypt_fini_padata(&pencrypt);
-	pcrypt_fini_padata(&pdecrypt);
+	pcrypt_fini_padata(pencrypt);
+	pcrypt_fini_padata(pdecrypt);
 
 	kset_unregister(pcrypt_kset);
 	crypto_unregister_template(&pcrypt_tmpl);
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index b7502a9..f2d7095 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -1,11 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * Cryptographic API.
- *
- * SHA-256, as specified in
- * http://csrc.nist.gov/groups/STM/cavp/documents/shs/sha256-384-512.pdf
- *
- * SHA-256 code by Jean-Luc Cooke <jlcooke@certainkey.com>.
+ * Crypto API wrapper for the generic SHA256 code from lib/crypto/sha256.c
  *
  * Copyright (c) Jean-Luc Cooke <jlcooke@certainkey.com>
  * Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
@@ -38,229 +33,44 @@
 };
 EXPORT_SYMBOL_GPL(sha256_zero_message_hash);
 
-static inline u32 Ch(u32 x, u32 y, u32 z)
+static int crypto_sha256_init(struct shash_desc *desc)
 {
-	return z ^ (x & (y ^ z));
+	return sha256_init(shash_desc_ctx(desc));
 }
 
-static inline u32 Maj(u32 x, u32 y, u32 z)
+static int crypto_sha224_init(struct shash_desc *desc)
 {
-	return (x & y) | (z & (x | y));
-}
-
-#define e0(x)       (ror32(x, 2) ^ ror32(x,13) ^ ror32(x,22))
-#define e1(x)       (ror32(x, 6) ^ ror32(x,11) ^ ror32(x,25))
-#define s0(x)       (ror32(x, 7) ^ ror32(x,18) ^ (x >> 3))
-#define s1(x)       (ror32(x,17) ^ ror32(x,19) ^ (x >> 10))
-
-static inline void LOAD_OP(int I, u32 *W, const u8 *input)
-{
-	W[I] = get_unaligned_be32((__u32 *)input + I);
-}
-
-static inline void BLEND_OP(int I, u32 *W)
-{
-	W[I] = s1(W[I-2]) + W[I-7] + s0(W[I-15]) + W[I-16];
-}
-
-static void sha256_transform(u32 *state, const u8 *input)
-{
-	u32 a, b, c, d, e, f, g, h, t1, t2;
-	u32 W[64];
-	int i;
-
-	/* load the input */
-	for (i = 0; i < 16; i++)
-		LOAD_OP(i, W, input);
-
-	/* now blend */
-	for (i = 16; i < 64; i++)
-		BLEND_OP(i, W);
-
-	/* load the state into our registers */
-	a=state[0];  b=state[1];  c=state[2];  d=state[3];
-	e=state[4];  f=state[5];  g=state[6];  h=state[7];
-
-	/* now iterate */
-	t1 = h + e1(e) + Ch(e,f,g) + 0x428a2f98 + W[ 0];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0x71374491 + W[ 1];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0xb5c0fbcf + W[ 2];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0xe9b5dba5 + W[ 3];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x3956c25b + W[ 4];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0x59f111f1 + W[ 5];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x923f82a4 + W[ 6];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0xab1c5ed5 + W[ 7];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0xd807aa98 + W[ 8];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0x12835b01 + W[ 9];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0x243185be + W[10];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0x550c7dc3 + W[11];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x72be5d74 + W[12];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0x80deb1fe + W[13];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x9bdc06a7 + W[14];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0xc19bf174 + W[15];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0xe49b69c1 + W[16];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0xefbe4786 + W[17];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0x0fc19dc6 + W[18];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0x240ca1cc + W[19];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x2de92c6f + W[20];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0x4a7484aa + W[21];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x5cb0a9dc + W[22];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0x76f988da + W[23];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0x983e5152 + W[24];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0xa831c66d + W[25];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0xb00327c8 + W[26];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0xbf597fc7 + W[27];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0xc6e00bf3 + W[28];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0xd5a79147 + W[29];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x06ca6351 + W[30];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0x14292967 + W[31];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0x27b70a85 + W[32];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0x2e1b2138 + W[33];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0x4d2c6dfc + W[34];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0x53380d13 + W[35];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x650a7354 + W[36];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0x766a0abb + W[37];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x81c2c92e + W[38];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0x92722c85 + W[39];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0xa2bfe8a1 + W[40];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0xa81a664b + W[41];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0xc24b8b70 + W[42];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0xc76c51a3 + W[43];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0xd192e819 + W[44];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0xd6990624 + W[45];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0xf40e3585 + W[46];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0x106aa070 + W[47];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0x19a4c116 + W[48];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0x1e376c08 + W[49];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0x2748774c + W[50];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0x34b0bcb5 + W[51];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x391c0cb3 + W[52];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0x4ed8aa4a + W[53];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0x5b9cca4f + W[54];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0x682e6ff3 + W[55];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	t1 = h + e1(e) + Ch(e,f,g) + 0x748f82ee + W[56];
-	t2 = e0(a) + Maj(a,b,c);    d+=t1;    h=t1+t2;
-	t1 = g + e1(d) + Ch(d,e,f) + 0x78a5636f + W[57];
-	t2 = e0(h) + Maj(h,a,b);    c+=t1;    g=t1+t2;
-	t1 = f + e1(c) + Ch(c,d,e) + 0x84c87814 + W[58];
-	t2 = e0(g) + Maj(g,h,a);    b+=t1;    f=t1+t2;
-	t1 = e + e1(b) + Ch(b,c,d) + 0x8cc70208 + W[59];
-	t2 = e0(f) + Maj(f,g,h);    a+=t1;    e=t1+t2;
-	t1 = d + e1(a) + Ch(a,b,c) + 0x90befffa + W[60];
-	t2 = e0(e) + Maj(e,f,g);    h+=t1;    d=t1+t2;
-	t1 = c + e1(h) + Ch(h,a,b) + 0xa4506ceb + W[61];
-	t2 = e0(d) + Maj(d,e,f);    g+=t1;    c=t1+t2;
-	t1 = b + e1(g) + Ch(g,h,a) + 0xbef9a3f7 + W[62];
-	t2 = e0(c) + Maj(c,d,e);    f+=t1;    b=t1+t2;
-	t1 = a + e1(f) + Ch(f,g,h) + 0xc67178f2 + W[63];
-	t2 = e0(b) + Maj(b,c,d);    e+=t1;    a=t1+t2;
-
-	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
-	state[4] += e; state[5] += f; state[6] += g; state[7] += h;
-
-	/* clear any sensitive info... */
-	a = b = c = d = e = f = g = h = t1 = t2 = 0;
-	memzero_explicit(W, 64 * sizeof(u32));
-}
-
-static void sha256_generic_block_fn(struct sha256_state *sst, u8 const *src,
-				    int blocks)
-{
-	while (blocks--) {
-		sha256_transform(sst->state, src);
-		src += SHA256_BLOCK_SIZE;
-	}
+	return sha224_init(shash_desc_ctx(desc));
 }
 
 int crypto_sha256_update(struct shash_desc *desc, const u8 *data,
 			  unsigned int len)
 {
-	return sha256_base_do_update(desc, data, len, sha256_generic_block_fn);
+	return sha256_update(shash_desc_ctx(desc), data, len);
 }
 EXPORT_SYMBOL(crypto_sha256_update);
 
-static int sha256_final(struct shash_desc *desc, u8 *out)
+static int crypto_sha256_final(struct shash_desc *desc, u8 *out)
 {
-	sha256_base_do_finalize(desc, sha256_generic_block_fn);
-	return sha256_base_finish(desc, out);
+	if (crypto_shash_digestsize(desc->tfm) == SHA224_DIGEST_SIZE)
+		return sha224_final(shash_desc_ctx(desc), out);
+	else
+		return sha256_final(shash_desc_ctx(desc), out);
 }
 
 int crypto_sha256_finup(struct shash_desc *desc, const u8 *data,
 			unsigned int len, u8 *hash)
 {
-	sha256_base_do_update(desc, data, len, sha256_generic_block_fn);
-	return sha256_final(desc, hash);
+	sha256_update(shash_desc_ctx(desc), data, len);
+	return crypto_sha256_final(desc, hash);
 }
 EXPORT_SYMBOL(crypto_sha256_finup);
 
 static struct shash_alg sha256_algs[2] = { {
 	.digestsize	=	SHA256_DIGEST_SIZE,
-	.init		=	sha256_base_init,
+	.init		=	crypto_sha256_init,
 	.update		=	crypto_sha256_update,
-	.final		=	sha256_final,
+	.final		=	crypto_sha256_final,
 	.finup		=	crypto_sha256_finup,
 	.descsize	=	sizeof(struct sha256_state),
 	.base		=	{
@@ -272,9 +82,9 @@
 	}
 }, {
 	.digestsize	=	SHA224_DIGEST_SIZE,
-	.init		=	sha224_base_init,
+	.init		=	crypto_sha224_init,
 	.update		=	crypto_sha256_update,
-	.final		=	sha256_final,
+	.final		=	crypto_sha256_final,
 	.finup		=	crypto_sha256_finup,
 	.descsize	=	sizeof(struct sha256_state),
 	.base		=	{
diff --git a/crypto/skcipher.c b/crypto/skcipher.c
index 5d836fc..22753c1 100644
--- a/crypto/skcipher.c
+++ b/crypto/skcipher.c
@@ -90,7 +90,7 @@
 	return max(start, end_page);
 }
 
-static void skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize)
+static int skcipher_done_slow(struct skcipher_walk *walk, unsigned int bsize)
 {
 	u8 *addr;
 
@@ -98,19 +98,21 @@
 	addr = skcipher_get_spot(addr, bsize);
 	scatterwalk_copychunks(addr, &walk->out, bsize,
 			       (walk->flags & SKCIPHER_WALK_PHYS) ? 2 : 1);
+	return 0;
 }
 
 int skcipher_walk_done(struct skcipher_walk *walk, int err)
 {
-	unsigned int n; /* bytes processed */
-	bool more;
+	unsigned int n = walk->nbytes;
+	unsigned int nbytes = 0;
 
-	if (unlikely(err < 0))
+	if (!n)
 		goto finish;
 
-	n = walk->nbytes - err;
-	walk->total -= n;
-	more = (walk->total != 0);
+	if (likely(err >= 0)) {
+		n -= err;
+		nbytes = walk->total - n;
+	}
 
 	if (likely(!(walk->flags & (SKCIPHER_WALK_PHYS |
 				    SKCIPHER_WALK_SLOW |
@@ -126,7 +128,7 @@
 		memcpy(walk->dst.virt.addr, walk->page, n);
 		skcipher_unmap_dst(walk);
 	} else if (unlikely(walk->flags & SKCIPHER_WALK_SLOW)) {
-		if (err) {
+		if (err > 0) {
 			/*
 			 * Didn't process all bytes.  Either the algorithm is
 			 * broken, or this was the last step and it turned out
@@ -134,27 +136,29 @@
 			 * the algorithm requires it.
 			 */
 			err = -EINVAL;
-			goto finish;
-		}
-		skcipher_done_slow(walk, n);
-		goto already_advanced;
+			nbytes = 0;
+		} else
+			n = skcipher_done_slow(walk, n);
 	}
 
+	if (err > 0)
+		err = 0;
+
+	walk->total = nbytes;
+	walk->nbytes = 0;
+
 	scatterwalk_advance(&walk->in, n);
 	scatterwalk_advance(&walk->out, n);
-already_advanced:
-	scatterwalk_done(&walk->in, 0, more);
-	scatterwalk_done(&walk->out, 1, more);
+	scatterwalk_done(&walk->in, 0, nbytes);
+	scatterwalk_done(&walk->out, 1, nbytes);
 
-	if (more) {
+	if (nbytes) {
 		crypto_yield(walk->flags & SKCIPHER_WALK_SLEEP ?
 			     CRYPTO_TFM_REQ_MAY_SLEEP : 0);
 		return skcipher_walk_next(walk);
 	}
-	err = 0;
-finish:
-	walk->nbytes = 0;
 
+finish:
 	/* Short-circuit for the common/fast path. */
 	if (!((unsigned long)walk->buffer | (unsigned long)walk->page))
 		goto out;
diff --git a/crypto/streebog_generic.c b/crypto/streebog_generic.c
index 63663c3..dc625ff 100644
--- a/crypto/streebog_generic.c
+++ b/crypto/streebog_generic.c
@@ -148,52 +148,6 @@
 	 } }
 };
 
-static const u8 Tau[64] = {
-	0,   8,  16,  24,  32,  40,  48,  56,
-	1,   9,  17,  25,  33,  41,  49,  57,
-	2,  10,  18,  26,  34,  42,  50,  58,
-	3,  11,  19,  27,  35,  43,  51,  59,
-	4,  12,  20,  28,  36,  44,  52,  60,
-	5,  13,  21,  29,  37,  45,  53,  61,
-	6,  14,  22,  30,  38,  46,  54,  62,
-	7,  15,  23,  31,  39,  47,  55,  63
-};
-
-static const u8 Pi[256] = {
-	252, 238, 221,  17, 207, 110,  49,  22,
-	251, 196, 250, 218,  35, 197,   4,  77,
-	233, 119, 240, 219, 147,  46, 153, 186,
-	 23,  54, 241, 187,  20, 205,  95, 193,
-	249,  24, 101,  90, 226,  92, 239,  33,
-	129,  28,  60,  66, 139,   1, 142,  79,
-	  5, 132,   2, 174, 227, 106, 143, 160,
-	  6,  11, 237, 152, 127, 212, 211,  31,
-	235,  52,  44,  81, 234, 200,  72, 171,
-	242,  42, 104, 162, 253,  58, 206, 204,
-	181, 112,  14,  86,   8,  12, 118,  18,
-	191, 114,  19,  71, 156, 183,  93, 135,
-	 21, 161, 150,  41,  16, 123, 154, 199,
-	243, 145, 120, 111, 157, 158, 178, 177,
-	 50, 117,  25,  61, 255,  53, 138, 126,
-	109,  84, 198, 128, 195, 189,  13,  87,
-	223, 245,  36, 169,  62, 168,  67, 201,
-	215, 121, 214, 246, 124,  34, 185,   3,
-	224,  15, 236, 222, 122, 148, 176, 188,
-	220, 232,  40,  80,  78,  51,  10,  74,
-	167, 151,  96, 115,  30,   0,  98,  68,
-	 26, 184,  56, 130, 100, 159,  38,  65,
-	173,  69,  70, 146,  39,  94,  85,  47,
-	140, 163, 165, 125, 105, 213, 149,  59,
-	  7,  88, 179,  64, 134, 172,  29, 247,
-	 48,  55, 107, 228, 136, 217, 231, 137,
-	225,  27, 131,  73,  76,  63, 248, 254,
-	141,  83, 170, 144, 202, 216, 133,  97,
-	 32, 113, 103, 164,  45,  43,   9,  91,
-	203, 155,  37, 208, 190, 229, 108,  82,
-	 89, 166, 116, 210, 230, 244, 180, 192,
-	209, 102, 175, 194,  57,  75,  99, 182
-};
-
 static const unsigned long long Ax[8][256] = {
 	{
 	0xd01f715b5c7ef8e6ULL, 0x16fa240980778325ULL, 0xa8a42e857ee049c8ULL,
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index ad78ab5..83ad0b1 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -2327,6 +2327,22 @@
 				  0, speed_template_32);
 		break;
 
+	case 220:
+		test_acipher_speed("essiv(cbc(aes),sha256)",
+				  ENCRYPT, sec, NULL, 0,
+				  speed_template_16_24_32);
+		test_acipher_speed("essiv(cbc(aes),sha256)",
+				  DECRYPT, sec, NULL, 0,
+				  speed_template_16_24_32);
+		break;
+
+	case 221:
+		test_aead_speed("aegis128", ENCRYPT, sec,
+				NULL, 0, 16, 8, speed_template_16);
+		test_aead_speed("aegis128", DECRYPT, sec,
+				NULL, 0, 16, 8, speed_template_16);
+		break;
+
 	case 300:
 		if (alg) {
 			test_hash_speed(alg, sec, generic_hash_speed_template);
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index d0b5b33..c39e39e 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -3887,18 +3887,6 @@
 			.aead = __VECS(aegis128_tv_template)
 		}
 	}, {
-		.alg = "aegis128l",
-		.test = alg_test_aead,
-		.suite = {
-			.aead = __VECS(aegis128l_tv_template)
-		}
-	}, {
-		.alg = "aegis256",
-		.test = alg_test_aead,
-		.suite = {
-			.aead = __VECS(aegis256_tv_template)
-		}
-	}, {
 		.alg = "ansi_cprng",
 		.test = alg_test_cprng,
 		.suite = {
@@ -4557,6 +4545,20 @@
 			.akcipher = __VECS(ecrdsa_tv_template)
 		}
 	}, {
+		.alg = "essiv(authenc(hmac(sha256),cbc(aes)),sha256)",
+		.test = alg_test_aead,
+		.fips_allowed = 1,
+		.suite = {
+			.aead = __VECS(essiv_hmac_sha256_aes_cbc_tv_temp)
+		}
+	}, {
+		.alg = "essiv(cbc(aes),sha256)",
+		.test = alg_test_skcipher,
+		.fips_allowed = 1,
+		.suite = {
+			.cipher = __VECS(essiv_aes_cbc_tv_template)
+		}
+	}, {
 		.alg = "gcm(aes)",
 		.generic_driver = "gcm_base(ctr(aes-generic),ghash-generic)",
 		.test = alg_test_aead,
@@ -4741,6 +4743,16 @@
 			}
 		}
 	}, {
+		.alg = "lzo-rle",
+		.test = alg_test_comp,
+		.fips_allowed = 1,
+		.suite = {
+			.comp = {
+				.comp = __VECS(lzorle_comp_tv_template),
+				.decomp = __VECS(lzorle_decomp_tv_template)
+			}
+		}
+	}, {
 		.alg = "md4",
 		.test = alg_test_hash,
 		.suite = {
@@ -4759,18 +4771,6 @@
 			.hash = __VECS(michael_mic_tv_template)
 		}
 	}, {
-		.alg = "morus1280",
-		.test = alg_test_aead,
-		.suite = {
-			.aead = __VECS(morus1280_tv_template)
-		}
-	}, {
-		.alg = "morus640",
-		.test = alg_test_aead,
-		.suite = {
-			.aead = __VECS(morus640_tv_template)
-		}
-	}, {
 		.alg = "nhpoly1305",
 		.test = alg_test_hash,
 		.suite = {
@@ -5240,9 +5240,11 @@
 					     type, mask);
 
 test_done:
-	if (rc && (fips_enabled || panic_on_fail))
+	if (rc && (fips_enabled || panic_on_fail)) {
+		fips_fail_notify();
 		panic("alg: self-tests for %s (%s) failed in %s mode!\n",
 		      driver, alg, fips_enabled ? "fips" : "panic_on_fail");
+	}
 
 	if (fips_enabled && !rc)
 		pr_info("alg: self-tests for %s (%s) passed\n", driver, alg);
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 073bd2e..ef7d21f 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -19489,2697 +19489,6 @@
 };
 
 /*
- * AEGIS-128L test vectors - generated via reference implementation from
- * SUPERCOP (https://bench.cr.yp.to/supercop.html):
- *
- *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
- *   (see crypto_aead/aegis128l/)
- */
-static const struct aead_testvec aegis128l_tv_template[] = {
-	{
-		.key	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
-			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
-		.klen	= 16,
-		.iv	= "\x1e\x92\x1c\xcf\x88\x3d\x54\x0d"
-			  "\x40\x6d\x59\x48\xfc\x92\x61\x03",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x30\x4f\xf3\xe9\xb1\xfa\x81\xa6"
-			  "\x20\x72\x78\xdd\x93\xc8\x57\xef",
-		.clen	= 16,
-	}, {
-		.key	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
-			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
-		.klen	= 16,
-		.iv	= "\x5a\xb7\x56\x6e\x98\xb9\xfd\x29"
-			  "\xc1\x47\x0b\xda\xf6\xb6\x23\x09",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x79",
-		.plen	= 1,
-		.ctext	= "\xa9\x24\xa0\xb6\x2d\xdd\x29\xdb"
-			  "\x40\xb3\x71\xc5\x22\x58\x31\x77"
-			  "\x6d",
-		.clen	= 17,
-	}, {
-		.key	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
-			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
-		.klen	= 16,
-		.iv	= "\x97\xdb\x90\x0e\xa8\x35\xa5\x45"
-			  "\x42\x21\xbd\x6b\xf0\xda\xe6\x0f",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
-			  "\x82\x8e\x16\xb4\xed\x6d\x47",
-		.plen	= 15,
-		.ctext	= "\xbb\x0a\x53\xc4\xaa\x7e\xa4\x03"
-			  "\x2b\xee\x62\x99\x7b\x98\x13\x1f"
-			  "\xe0\x76\x4c\x2e\x53\x99\x4f\xbe"
-			  "\xe1\xa8\x04\x7f\xe1\x71\xbe",
-		.clen	= 31,
-	}, {
-		.key	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
-			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
-		.klen	= 16,
-		.iv	= "\xd3\x00\xc9\xad\xb8\xb0\x4e\x61"
-			  "\xc3\xfb\x6f\xfd\xea\xff\xa9\x15",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
-			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
-		.plen	= 16,
-		.ctext	= "\x66\xdf\x6e\x71\xc0\x6e\xa4\x4c"
-			  "\x9d\xb7\x8c\x9a\xdb\x1f\xd2\x2e"
-			  "\x23\xb6\xa4\xfb\xd3\x86\xdd\xbb"
-			  "\xde\x54\x9b\xf5\x92\x8b\x93\xc5",
-		.clen	= 32,
-	}, {
-		.key	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
-			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
-		.klen	= 16,
-		.iv	= "\x10\x25\x03\x4c\xc8\x2c\xf7\x7d"
-			  "\x44\xd5\x21\x8e\xe4\x23\x6b\x1c",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
-			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f"
-			  "\xd3",
-		.plen	= 17,
-		.ctext	= "\x4f\xc3\x69\xb6\xd3\xa4\x64\x8b"
-			  "\x71\xc3\x8a\x91\x22\x4f\x1b\xd2"
-			  "\x33\x6d\x86\xbc\xf8\x2f\x06\xf9"
-			  "\x82\x64\xc7\x72\x00\x30\xfc\xf0"
-			  "\xf8",
-		.clen	= 33,
-	}, {
-		.key	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
-			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
-		.klen	= 16,
-		.iv	= "\x4c\x49\x3d\xec\xd8\xa8\xa0\x98"
-			  "\xc5\xb0\xd3\x1f\xde\x48\x2e\x22",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
-			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25"
-			  "\xfe\x8d\x45\x19\x1e\xc0\x0b\x99"
-			  "\x88\x11\x39\x12\x1c\x3a\xbb",
-		.plen	= 31,
-		.ctext	= "\xe3\x93\x15\xae\x5f\x9d\x3c\xb5"
-			  "\xd6\x9d\xee\xee\xcf\xaa\xaf\xe1"
-			  "\x45\x10\x96\xe0\xbf\x55\x0f\x4c"
-			  "\x1a\xfd\xf4\xda\x4e\x10\xde\xc9"
-			  "\x0e\x6f\xc7\x3c\x49\x94\x41\xfc"
-			  "\x59\x28\x88\x3c\x79\x10\x6b",
-		.clen	= 47,
-	}, {
-		.key	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
-			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
-		.klen	= 16,
-		.iv	= "\x89\x6e\x77\x8b\xe8\x23\x49\xb4"
-			  "\x45\x8a\x85\xb1\xd8\x6c\xf1\x28",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
-			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b"
-			  "\x28\x50\x51\x9d\x24\x60\x8d\xb3"
-			  "\x49\x3e\x17\xea\xf6\x99\x5a\xdd",
-		.plen	= 32,
-		.ctext	= "\x1c\x8e\x22\x34\xfd\xab\xe6\x0d"
-			  "\x1c\x9f\x06\x54\x8b\x0b\xb4\x40"
-			  "\xde\x11\x59\x3e\xfd\x74\xf6\x42"
-			  "\x97\x17\xf7\x24\xb6\x7e\xc4\xc6"
-			  "\x06\xa3\x94\xda\x3d\x7f\x55\x0a"
-			  "\x92\x07\x2f\xa6\xf3\x6b\x2c\xfc",
-		.clen	= 48,
-	}, {
-		.key	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
-			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
-		.klen	= 16,
-		.iv	= "\xc5\x93\xb0\x2a\xf8\x9f\xf1\xd0"
-			  "\xc6\x64\x37\x42\xd2\x90\xb3\x2e",
-		.assoc	= "\xd5",
-		.alen	= 1,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xa0\x2a\xb4\x9a\x91\x00\x15\xb8"
-			  "\x0f\x9a\x15\x60\x0e\x9b\x13\x8f",
-		.clen	= 16,
-	}, {
-		.key	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
-			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
-		.klen	= 16,
-		.iv	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
-			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34",
-		.assoc	= "\x11\x81\x78\x32\x4d\xb9\x44\x73"
-			  "\x68\x75\x16\xf8\xcb\x7e\xa7",
-		.alen	= 15,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x4c\x26\xad\x9c\x14\xfd\x9c\x8c"
-			  "\x84\xfb\x26\xfb\xd5\xca\x62\x39",
-		.clen	= 16,
-	}, {
-		.key	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
-			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
-		.klen	= 16,
-		.iv	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
-			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b",
-		.assoc	= "\x4e\xa5\xb2\xd1\x5d\x35\xed\x8f"
-			  "\xe8\x4f\xc8\x89\xc5\xa2\x69\xbc",
-		.alen	= 16,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x45\x85\x0e\x0f\xf4\xae\x96\xa1"
-			  "\x99\x4d\x6d\xb4\x67\x32\xb0\x3a",
-		.clen	= 16,
-	}, {
-		.key	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
-			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
-		.klen	= 16,
-		.iv	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
-			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41",
-		.assoc	= "\x8a\xca\xec\x70\x6d\xb1\x96\xab"
-			  "\x69\x29\x7a\x1b\xbf\xc7\x2c\xc2"
-			  "\x07",
-		.alen	= 17,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x33\xb1\x42\x97\x8e\x16\x7b\x63"
-			  "\x06\xba\x5b\xcb\xae\x6d\x8b\x56",
-		.clen	= 16,
-	}, {
-		.key	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
-			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
-		.klen	= 16,
-		.iv	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
-			  "\xca\xcd\xff\x88\xba\x22\xbe\x47",
-		.assoc	= "\xc7\xef\x26\x10\x7d\x2c\x3f\xc6"
-			  "\xea\x03\x2c\xac\xb9\xeb\xef\xc9"
-			  "\x31\x6b\x08\x12\xfc\xd8\x37\x2d"
-			  "\xe0\x17\x3a\x2e\x83\x5c\x8f",
-		.alen	= 31,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xda\x44\x08\x8c\x2a\xa5\x07\x35"
-			  "\x0b\x54\x4e\x6d\xe3\xfd\xc4\x5f",
-		.clen	= 16,
-	}, {
-		.key	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
-			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
-		.klen	= 16,
-		.iv	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
-			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d",
-		.assoc	= "\x03\x14\x5f\xaf\x8d\xa8\xe7\xe2"
-			  "\x6b\xde\xde\x3e\xb3\x10\xb1\xcf"
-			  "\x5c\x2d\x14\x96\x01\x78\xb9\x47"
-			  "\xa1\x44\x19\x06\x5d\xbb\x2e\x2f",
-		.alen	= 32,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x1b\xb1\xf1\xa8\x9e\xc2\xb2\x88"
-			  "\x40\x7f\x7b\x19\x7a\x52\x8c\xf0",
-		.clen	= 16,
-	}, {
-		.key	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
-			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
-		.klen	= 16,
-		.iv	= "\x31\x6f\x0b\xe6\x59\x85\xe6\x77"
-			  "\xcc\x81\x63\xab\xae\x6b\x43\x54",
-		.assoc	= "\x40",
-		.alen	= 1,
-		.ptext	= "\x4f",
-		.plen	= 1,
-		.ctext	= "\x6e\xc8\xfb\x15\x9d\x98\x49\xc9"
-			  "\xa0\x98\x09\x85\xbe\x56\x8e\x79"
-			  "\xf4",
-		.clen	= 17,
-	}, {
-		.key	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
-			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
-		.klen	= 16,
-		.iv	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
-			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a",
-		.assoc	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
-			  "\x6d\x92\x42\x61\xa7\x58\x37",
-		.alen	= 15,
-		.ptext	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
-			  "\x8d\xc8\x6e\x85\xa5\x21\x67",
-		.plen	= 15,
-		.ctext	= "\x99\x2e\x84\x50\x64\x5c\xab\x29"
-			  "\x20\xba\xb9\x2f\x62\x3a\xce\x2a"
-			  "\x75\x25\x3b\xe3\x40\xe0\x1d\xfc"
-			  "\x20\x63\x0b\x49\x7e\x97\x08",
-		.clen	= 31,
-	}, {
-		.key	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
-			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
-		.klen	= 16,
-		.iv	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
-			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60",
-		.assoc	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
-			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
-		.alen	= 16,
-		.ptext	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
-			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
-		.plen	= 16,
-		.ctext	= "\xd9\x8e\xfd\x50\x8f\x02\x9f\xee"
-			  "\x78\x08\x12\xec\x09\xaf\x53\x14"
-			  "\x90\x3e\x3d\x76\xad\x71\x21\x08"
-			  "\x77\xe5\x4b\x15\xc2\xe6\xbc\xdb",
-		.clen	= 32,
-	}, {
-		.key	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
-			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
-		.klen	= 16,
-		.iv	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
-			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66",
-		.assoc	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
-			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
-			  "\x05",
-		.alen	= 17,
-		.ptext	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
-			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69"
-			  "\xd0",
-		.plen	= 17,
-		.ctext	= "\xf3\xe7\x95\x86\xcf\x34\x95\x96"
-			  "\x17\xfe\x1b\xae\x1b\x31\xf2\x1a"
-			  "\xbd\xbc\xc9\x4e\x11\x29\x09\x5c"
-			  "\x05\xd3\xb4\x2e\x4a\x74\x59\x49"
-			  "\x7d",
-		.clen	= 33,
-	}, {
-		.key	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
-			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
-		.klen	= 16,
-		.iv	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
-			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d",
-		.assoc	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
-			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
-			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
-			  "\x68\x28\x73\x40\x9f\x96\x4a",
-		.alen	= 31,
-		.ptext	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
-			  "\x10\x57\x85\x39\x93\x8f\xaf\x70"
-			  "\xfa\xa9\xd0\x4d\x5c\x40\x23\xcd"
-			  "\x98\x34\xab\x37\x56\xae\x32",
-		.plen	= 31,
-		.ctext	= "\x06\x96\xb2\xbf\x63\xf4\x1e\x24"
-			  "\x0d\x19\x15\x61\x65\x3b\x06\x26"
-			  "\x71\xe8\x7e\x16\xdb\x96\x01\x01"
-			  "\x52\xcd\x49\x5b\x07\x33\x4e\xe7"
-			  "\xaa\x91\xf5\xd5\xc6\xfe\x41\xb5"
-			  "\xed\x90\xce\xb9\xcd\xcc\xa1",
-		.clen	= 47,
-	}, {
-		.key	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
-			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
-		.klen	= 16,
-		.iv	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
-			  "\x50\xc4\xde\x82\x90\x21\x11\x73",
-		.assoc	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
-			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
-			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
-			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
-		.alen	= 32,
-		.ptext	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
-			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76"
-			  "\x24\x6b\xdc\xd1\x61\xe0\xa5\xe7"
-			  "\x5a\x61\x8a\x0f\x30\x0d\xd1\xec",
-		.plen	= 32,
-		.ctext	= "\xf9\xd7\xee\x17\xfd\x24\xcd\xf1"
-			  "\xbc\x0f\x35\x97\x97\x0c\x4b\x18"
-			  "\xce\x58\xc8\x3b\xd4\x85\x93\x79"
-			  "\xcc\x9c\xea\xc1\x73\x13\x0b\x4c"
-			  "\xcc\x6f\x28\xf8\xa4\x4e\xb8\x56"
-			  "\x64\x4e\x47\xce\xb2\xb4\x92\xb4",
-		.clen	= 48,
-	}, {
-		.key	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
-			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
-		.klen	= 16,
-		.iv	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
-			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79",
-		.assoc	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
-			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
-			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
-			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
-			  "\x9d",
-		.alen	= 33,
-		.ptext	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
-			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c"
-			  "\x4f\x2e\xe8\x55\x66\x80\x27\x00"
-			  "\x1b\x8f\x68\xe7\x0a\x6c\x71\xc3"
-			  "\x21\x78\x55\x9d\x9c\x65\x7b\xcd"
-			  "\x0a\x34\x97\xff\x47\x37\xb0\x2a"
-			  "\x80\x0d\x19\x98\x33\xa9\x7a\xe3"
-			  "\x2e\x4c\xc6\xf3\x8c\x88\x42\x01"
-			  "\xbd",
-		.plen	= 65,
-		.ctext	= "\x58\xfa\x3a\x3d\xd9\x88\x63\xe8"
-			  "\xc5\x78\x50\x8b\x4a\xc9\xdf\x7f"
-			  "\x4b\xfa\xc8\x2e\x67\x43\xf3\x63"
-			  "\x42\x8e\x99\x5a\x9c\x0b\x84\x77"
-			  "\xbc\x46\x76\x48\x82\xc7\x57\x96"
-			  "\xe1\x65\xd1\xed\x1d\xdd\x80\x24"
-			  "\xa6\x4d\xa9\xf1\x53\x8b\x5e\x0e"
-			  "\x26\xb9\xcc\x37\xe5\x43\xe1\x5a"
-			  "\x8a\xd6\x8c\x5a\xe4\x95\xd1\x8d"
-			  "\xf7\x33\x64\xc1\xd3\xf2\xfc\x35"
-			  "\x01",
-		.clen	= 81,
-	}, {
-		.key	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
-			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
-		.klen	= 16,
-		.iv	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
-			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f",
-		.assoc	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
-			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
-			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
-			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
-			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
-			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
-			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
-			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
-			  "\x54",
-		.alen	= 65,
-		.ptext	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
-			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82"
-			  "\x79\xf0\xf3\xd9\x6c\x20\xa9\x1a"
-			  "\xdc\xbc\x47\xc0\xe4\xcb\x10\x99"
-			  "\x2f",
-		.plen	= 33,
-		.ctext	= "\x4c\xa9\xac\x71\xed\x10\xa6\x24"
-			  "\xb7\xa7\xdf\x8b\xf5\xc2\x41\xcb"
-			  "\x05\xc9\xd6\x97\xb6\x10\x7f\x17"
-			  "\xc2\xc0\x93\xcf\xe0\x94\xfd\x99"
-			  "\xf2\x62\x25\x28\x01\x23\x6f\x8b"
-			  "\x04\x52\xbc\xb0\x3e\x66\x52\x90"
-			  "\x9f",
-		.clen	= 49,
-	}, {
-		.key	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
-			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
-		.klen	= 16,
-		.iv	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
-			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85",
-		.assoc	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
-			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
-		.alen	= 16,
-		.ptext	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
-			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
-		.plen	= 16,
-		.ctext	= "\x6d\xed\x04\x7a\x2f\x0c\x30\xa5"
-			  "\x96\xe6\x97\xe4\x10\xeb\x40\x95"
-			  "\xc5\x9a\xdf\x31\xd5\xa5\xa6\xec"
-			  "\x05\xa8\x31\x50\x11\x19\x44",
-		.clen	= 31,
-	}, {
-		.key	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
-			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
-		.klen	= 16,
-		.iv	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
-			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c",
-		.assoc	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
-			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
-		.alen	= 16,
-		.ptext	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
-			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
-		.plen	= 16,
-		.ctext	= "\x30\x95\x7d\xea\xdc\x62\xc0\x88"
-			  "\xa1\xe3\x8d\x8c\xac\x04\x10\xa7"
-			  "\xfa\xfa\x07\xbd\xa0\xf0\x36\xeb"
-			  "\x21\x93\x2e\x31\x84\x83",
-		.clen	= 30,
-	}, {
-		.key	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
-			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
-		.klen	= 16,
-		.iv	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
-			  "\xd5\x07\x58\x59\x72\xd7\xde\x92",
-		.assoc	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
-			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
-		.alen	= 16,
-		.ptext	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
-			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
-		.plen	= 16,
-		.ctext	= "\x93\xcd\xee\xd4\xcb\x9d\x8d\x16"
-			  "\x63\x0d\x43\xd5\x49\xca\xa8\x85"
-			  "\x49\xc0\xae\x13\xbc\x26\x1d\x4b",
-		.clen	= 24,
-	},
-};
-
-/*
- * AEGIS-256 test vectors - generated via reference implementation from
- * SUPERCOP (https://bench.cr.yp.to/supercop.html):
- *
- *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
- *   (see crypto_aead/aegis256/)
- */
-static const struct aead_testvec aegis256_tv_template[] = {
-	{
-		.key	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
-			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81"
-			  "\xca\xb0\x82\x21\x41\xa8\xe0\x06"
-			  "\x30\x0b\x37\xf6\xb6\x17\xe7\xb5",
-		.klen	= 32,
-		.iv	= "\x1e\x92\x1c\xcf\x88\x3d\x54\x0d"
-			  "\x40\x6d\x59\x48\xfc\x92\x61\x03"
-			  "\x95\x61\x05\x42\x82\x50\xc0\x0c"
-			  "\x60\x16\x6f\xec\x6d\x2f\xcf\x6b",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xd5\x65\x3a\xa9\x03\x51\xd7\xaa"
-			  "\xfa\x4b\xd8\xa2\x41\x9b\xc1\xb2",
-		.clen	= 16,
-	}, {
-		.key	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
-			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87"
-			  "\xf4\x72\x8e\xa5\x46\x48\x62\x20"
-			  "\xf1\x38\x16\xce\x90\x76\x87\x8c",
-		.klen	= 32,
-		.iv	= "\x5a\xb7\x56\x6e\x98\xb9\xfd\x29"
-			  "\xc1\x47\x0b\xda\xf6\xb6\x23\x09"
-			  "\xbf\x23\x11\xc6\x87\xf0\x42\x26"
-			  "\x22\x44\x4e\xc4\x47\x8e\x6e\x41",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x79",
-		.plen	= 1,
-		.ctext	= "\x84\xa2\x8f\xad\xdb\x8d\x2c\x16"
-			  "\x9e\x89\xd9\x06\xa6\xa8\x14\x29"
-			  "\x8b",
-		.clen	= 17,
-	}, {
-		.key	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
-			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e"
-			  "\x1f\x35\x9a\x29\x4b\xe8\xe4\x39"
-			  "\xb3\x66\xf5\xa6\x6a\xd5\x26\x62",
-		.klen	= 32,
-		.iv	= "\x97\xdb\x90\x0e\xa8\x35\xa5\x45"
-			  "\x42\x21\xbd\x6b\xf0\xda\xe6\x0f"
-			  "\xe9\xe5\x1d\x4a\x8c\x90\xc4\x40"
-			  "\xe3\x71\x2d\x9c\x21\xed\x0e\x18",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
-			  "\x82\x8e\x16\xb4\xed\x6d\x47",
-		.plen	= 15,
-		.ctext	= "\x09\x94\x1f\xa6\x13\xc3\x74\x75"
-			  "\x17\xad\x8a\x0e\xd8\x66\x9a\x28"
-			  "\xd7\x30\x66\x09\x2a\xdc\xfa\x2a"
-			  "\x9f\x3b\xd7\xdd\x66\xd1\x2b",
-		.clen	= 31,
-	}, {
-		.key	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
-			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94"
-			  "\x49\xf7\xa5\xad\x50\x88\x66\x53"
-			  "\x74\x94\xd4\x7f\x44\x34\xc5\x39",
-		.klen	= 32,
-		.iv	= "\xd3\x00\xc9\xad\xb8\xb0\x4e\x61"
-			  "\xc3\xfb\x6f\xfd\xea\xff\xa9\x15"
-			  "\x14\xa8\x28\xce\x92\x30\x46\x59"
-			  "\xa4\x9f\x0b\x75\xfb\x4c\xad\xee",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
-			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
-		.plen	= 16,
-		.ctext	= "\x8a\x46\xa2\x22\x8c\x03\xab\x6f"
-			  "\x54\x63\x4e\x7f\xc9\x8e\xfa\x70"
-			  "\x7b\xe5\x8d\x78\xbc\xe9\xb6\xa1"
-			  "\x29\x17\xc8\x3b\x52\xa4\x98\x72",
-		.clen	= 32,
-	}, {
-		.key	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
-			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a"
-			  "\x74\xb9\xb1\x32\x55\x28\xe8\x6d"
-			  "\x35\xc1\xb3\x57\x1f\x93\x64\x0f",
-		.klen	= 32,
-		.iv	= "\x10\x25\x03\x4c\xc8\x2c\xf7\x7d"
-			  "\x44\xd5\x21\x8e\xe4\x23\x6b\x1c"
-			  "\x3e\x6a\x34\x53\x97\xd0\xc8\x73"
-			  "\x66\xcd\xea\x4d\xd5\xab\x4c\xc5",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
-			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f"
-			  "\xd3",
-		.plen	= 17,
-		.ctext	= "\x71\x6b\x37\x0b\x02\x61\x28\x12"
-			  "\x83\xab\x66\x90\x84\xc7\xd1\xc5"
-			  "\xb2\x7a\xb4\x7b\xb4\xfe\x02\xb2"
-			  "\xc0\x00\x39\x13\xb5\x51\x68\x44"
-			  "\xad",
-		.clen	= 33,
-	}, {
-		.key	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
-			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0"
-			  "\x9e\x7c\xbc\xb6\x5b\xc8\x6a\x86"
-			  "\xf7\xef\x91\x30\xf9\xf2\x04\xe6",
-		.klen	= 32,
-		.iv	= "\x4c\x49\x3d\xec\xd8\xa8\xa0\x98"
-			  "\xc5\xb0\xd3\x1f\xde\x48\x2e\x22"
-			  "\x69\x2c\x3f\xd7\x9c\x70\x4a\x8d"
-			  "\x27\xfa\xc9\x26\xaf\x0a\xeb\x9c",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
-			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25"
-			  "\xfe\x8d\x45\x19\x1e\xc0\x0b\x99"
-			  "\x88\x11\x39\x12\x1c\x3a\xbb",
-		.plen	= 31,
-		.ctext	= "\xaf\xa4\x34\x0d\x59\xe6\x1c\x2f"
-			  "\x06\x3b\x52\x18\x49\x75\x1b\xf0"
-			  "\x53\x09\x72\x7b\x45\x79\xe0\xbe"
-			  "\x89\x85\x23\x15\xb8\x79\x07\x4c"
-			  "\x53\x7a\x15\x37\x0a\xee\xb7\xfb"
-			  "\xc4\x1f\x12\x27\xcf\x77\x90",
-		.clen	= 47,
-	}, {
-		.key	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
-			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6"
-			  "\xc8\x3e\xc8\x3a\x60\x68\xec\xa0"
-			  "\xb8\x1c\x70\x08\xd3\x51\xa3\xbd",
-		.klen	= 32,
-		.iv	= "\x89\x6e\x77\x8b\xe8\x23\x49\xb4"
-			  "\x45\x8a\x85\xb1\xd8\x6c\xf1\x28"
-			  "\x93\xef\x4b\x5b\xa1\x10\xcc\xa6"
-			  "\xe8\x28\xa8\xfe\x89\x69\x8b\x72",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
-			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b"
-			  "\x28\x50\x51\x9d\x24\x60\x8d\xb3"
-			  "\x49\x3e\x17\xea\xf6\x99\x5a\xdd",
-		.plen	= 32,
-		.ctext	= "\xe2\xc9\x0b\x33\x31\x02\xb3\xb4"
-			  "\x33\xfe\xeb\xa8\xb7\x9b\xb2\xd7"
-			  "\xeb\x0f\x05\x2b\xba\xb3\xca\xef"
-			  "\xf6\xd1\xb6\xc0\xb9\x9b\x85\xc5"
-			  "\xbf\x7a\x3e\xcc\x31\x76\x09\x80"
-			  "\x32\x5d\xbb\xe8\x38\x0e\x77\xd3",
-		.clen	= 48,
-	}, {
-		.key	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
-			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad"
-			  "\xf3\x00\xd4\xbf\x65\x08\x6e\xb9"
-			  "\x7a\x4a\x4f\xe0\xad\xb0\x42\x93",
-		.klen	= 32,
-		.iv	= "\xc5\x93\xb0\x2a\xf8\x9f\xf1\xd0"
-			  "\xc6\x64\x37\x42\xd2\x90\xb3\x2e"
-			  "\xbd\xb1\x57\xe0\xa6\xb0\x4e\xc0"
-			  "\xaa\x55\x87\xd6\x63\xc8\x2a\x49",
-		.assoc	= "\xd5",
-		.alen	= 1,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x96\x43\x30\xca\x6c\x4f\xd7\x12"
-			  "\xba\xd9\xb3\x18\x86\xdf\xc3\x52",
-		.clen	= 16,
-	}, {
-		.key	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
-			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3"
-			  "\x1d\xc3\xdf\x43\x6a\xa8\xf0\xd3"
-			  "\x3b\x77\x2e\xb9\x87\x0f\xe1\x6a",
-		.klen	= 32,
-		.iv	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
-			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34"
-			  "\xe8\x73\x62\x64\xab\x50\xd0\xda"
-			  "\x6b\x83\x66\xaf\x3e\x27\xc9\x1f",
-		.assoc	= "\x11\x81\x78\x32\x4d\xb9\x44\x73"
-			  "\x68\x75\x16\xf8\xcb\x7e\xa7",
-		.alen	= 15,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x2f\xab\x45\xe2\xa7\x46\xc5\x83"
-			  "\x11\x9f\xb0\x74\xee\xc7\x03\xdd",
-		.clen	= 16,
-	}, {
-		.key	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
-			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9"
-			  "\x47\x85\xeb\xc7\x6f\x48\x72\xed"
-			  "\xfc\xa5\x0d\x91\x61\x6e\x81\x40",
-		.klen	= 32,
-		.iv	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
-			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b"
-			  "\x12\x35\x6e\xe8\xb0\xf0\x52\xf3"
-			  "\x2d\xb0\x45\x87\x18\x86\x68\xf6",
-		.assoc	= "\x4e\xa5\xb2\xd1\x5d\x35\xed\x8f"
-			  "\xe8\x4f\xc8\x89\xc5\xa2\x69\xbc",
-		.alen	= 16,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x16\x44\x73\x33\x5d\xf2\xb9\x04"
-			  "\x6b\x79\x98\xef\xdb\xd5\xc5\xf1",
-		.clen	= 16,
-	}, {
-		.key	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
-			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf"
-			  "\x72\x47\xf6\x4b\x74\xe8\xf4\x06"
-			  "\xbe\xd3\xec\x6a\x3b\xcd\x20\x17",
-		.klen	= 32,
-		.iv	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
-			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
-			  "\x3c\xf8\x79\x6c\xb6\x90\xd4\x0d"
-			  "\xee\xde\x23\x60\xf2\xe5\x08\xcc",
-		.assoc	= "\x8a\xca\xec\x70\x6d\xb1\x96\xab"
-			  "\x69\x29\x7a\x1b\xbf\xc7\x2c\xc2"
-			  "\x07",
-		.alen	= 17,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xa4\x9b\xb8\x47\xc0\xed\x7a\x45"
-			  "\x98\x54\x8c\xed\x3d\x17\xf0\xdd",
-		.clen	= 16,
-	}, {
-		.key	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
-			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6"
-			  "\x9c\x0a\x02\xd0\x79\x88\x76\x20"
-			  "\x7f\x00\xca\x42\x15\x2c\xbf\xed",
-		.klen	= 32,
-		.iv	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
-			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
-			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
-			  "\xaf\x0b\x02\x38\xcc\x44\xa7\xa3",
-		.assoc	= "\xc7\xef\x26\x10\x7d\x2c\x3f\xc6"
-			  "\xea\x03\x2c\xac\xb9\xeb\xef\xc9"
-			  "\x31\x6b\x08\x12\xfc\xd8\x37\x2d"
-			  "\xe0\x17\x3a\x2e\x83\x5c\x8f",
-		.alen	= 31,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x20\x24\xe2\x33\x5c\x60\xc9\xf0"
-			  "\xa4\x96\x2f\x0d\x53\xc2\xf8\xfc",
-		.clen	= 16,
-	}, {
-		.key	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
-			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc"
-			  "\xc6\xcc\x0e\x54\x7f\x28\xf8\x3a"
-			  "\x40\x2e\xa9\x1a\xf0\x8b\x5e\xc4",
-		.klen	= 32,
-		.iv	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
-			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
-			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
-			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a",
-		.assoc	= "\x03\x14\x5f\xaf\x8d\xa8\xe7\xe2"
-			  "\x6b\xde\xde\x3e\xb3\x10\xb1\xcf"
-			  "\x5c\x2d\x14\x96\x01\x78\xb9\x47"
-			  "\xa1\x44\x19\x06\x5d\xbb\x2e\x2f",
-		.alen	= 32,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x6f\x4a\xb9\xe0\xff\x51\xa3\xf1"
-			  "\xd2\x64\x3e\x66\x6a\xb2\x03\xc0",
-		.clen	= 16,
-	}, {
-		.key	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
-			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2"
-			  "\xf1\x8e\x19\xd8\x84\xc8\x7a\x53"
-			  "\x02\x5b\x88\xf3\xca\xea\xfe\x9b",
-		.klen	= 32,
-		.iv	= "\x31\x6f\x0b\xe6\x59\x85\xe6\x77"
-			  "\xcc\x81\x63\xab\xae\x6b\x43\x54"
-			  "\xbb\x3f\x9c\xf9\xc5\x70\x5a\x5a"
-			  "\x32\x67\xc0\xe9\x80\x02\xe5\x50",
-		.assoc	= "\x40",
-		.alen	= 1,
-		.ptext	= "\x4f",
-		.plen	= 1,
-		.ctext	= "\x2c\xfb\xad\x7e\xbe\xa0\x9a\x5b"
-			  "\x7a\x3f\x81\xf7\xfc\x1b\x79\x83"
-			  "\xc7",
-		.clen	= 17,
-	}, {
-		.key	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
-			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8"
-			  "\x1b\x50\x25\x5d\x89\x68\xfc\x6d"
-			  "\xc3\x89\x67\xcb\xa4\x49\x9d\x71",
-		.klen	= 32,
-		.iv	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
-			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a"
-			  "\xe6\x01\xa8\x7e\xca\x10\xdc\x73"
-			  "\xf4\x94\x9f\xc1\x5a\x61\x85\x27",
-		.assoc	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
-			  "\x6d\x92\x42\x61\xa7\x58\x37",
-		.alen	= 15,
-		.ptext	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
-			  "\x8d\xc8\x6e\x85\xa5\x21\x67",
-		.plen	= 15,
-		.ctext	= "\x1f\x7f\xca\x3c\x2b\xe7\x27\xba"
-			  "\x7e\x98\x83\x02\x34\x23\xf7\x94"
-			  "\xde\x35\xe6\x1d\x14\x18\xe5\x38"
-			  "\x14\x80\x6a\xa7\x1b\xae\x1d",
-		.clen	= 31,
-	}, {
-		.key	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
-			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf"
-			  "\x46\x13\x31\xe1\x8e\x08\x7e\x87"
-			  "\x85\xb6\x46\xa3\x7e\xa8\x3c\x48",
-		.klen	= 32,
-		.iv	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
-			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60"
-			  "\x10\xc3\xb3\x02\xcf\xb0\x5e\x8d"
-			  "\xb5\xc2\x7e\x9a\x35\xc0\x24\xfd",
-		.assoc	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
-			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
-		.alen	= 16,
-		.ptext	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
-			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
-		.plen	= 16,
-		.ctext	= "\x05\x86\x9e\xd7\x2b\xa3\x97\x01"
-			  "\xbe\x28\x98\x10\x6f\xe9\x61\x32"
-			  "\x96\xbb\xb1\x2e\x8f\x0c\x44\xb9"
-			  "\x46\x2d\x55\xe3\x42\x67\xf2\xaf",
-		.clen	= 32,
-	}, {
-		.key	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
-			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5"
-			  "\x70\xd5\x3c\x65\x93\xa8\x00\xa0"
-			  "\x46\xe4\x25\x7c\x58\x08\xdb\x1e",
-		.klen	= 32,
-		.iv	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
-			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
-			  "\x3b\x86\xbf\x86\xd4\x50\xe0\xa7"
-			  "\x76\xef\x5c\x72\x0f\x1f\xc3\xd4",
-		.assoc	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
-			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
-			  "\x05",
-		.alen	= 17,
-		.ptext	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
-			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69"
-			  "\xd0",
-		.plen	= 17,
-		.ctext	= "\x9c\xe0\x06\x7b\x86\xcf\x2e\xd8"
-			  "\x45\x65\x1b\x72\x9b\xaa\xa3\x1e"
-			  "\x87\x9d\x26\xdf\xff\x81\x11\xd2"
-			  "\x47\x41\xb9\x24\xc1\x8a\xa3\x8b"
-			  "\x55",
-		.clen	= 33,
-	}, {
-		.key	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
-			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb"
-			  "\x9a\x97\x48\xe9\x98\x48\x82\xba"
-			  "\x07\x11\x04\x54\x32\x67\x7b\xf5",
-		.klen	= 32,
-		.iv	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
-			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
-			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
-			  "\x38\x1d\x3b\x4a\xe9\x7e\x62\xaa",
-		.assoc	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
-			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
-			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
-			  "\x68\x28\x73\x40\x9f\x96\x4a",
-		.alen	= 31,
-		.ptext	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
-			  "\x10\x57\x85\x39\x93\x8f\xaf\x70"
-			  "\xfa\xa9\xd0\x4d\x5c\x40\x23\xcd"
-			  "\x98\x34\xab\x37\x56\xae\x32",
-		.plen	= 31,
-		.ctext	= "\xa0\xc8\xde\x83\x0d\xc3\x4e\xd5"
-			  "\x69\x7f\x7a\xdd\x8c\x46\xda\xba"
-			  "\x0a\x5c\x0e\x7f\xac\xee\x02\xd2"
-			  "\xe5\x4b\x0a\xba\xb8\xa4\x7b\x66"
-			  "\xde\xae\xdb\xc2\xc0\x0b\xf7\x2b"
-			  "\xdf\xb8\xea\xd8\xa9\x38\xed",
-		.clen	= 47,
-	}, {
-		.key	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
-			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1"
-			  "\xc5\x5a\x53\x6e\x9d\xe8\x04\xd4"
-			  "\xc9\x3f\xe2\x2d\x0c\xc6\x1a\xcb",
-		.klen	= 32,
-		.iv	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
-			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
-			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
-			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81",
-		.assoc	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
-			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
-			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
-			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
-		.alen	= 32,
-		.ptext	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
-			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76"
-			  "\x24\x6b\xdc\xd1\x61\xe0\xa5\xe7"
-			  "\x5a\x61\x8a\x0f\x30\x0d\xd1\xec",
-		.plen	= 32,
-		.ctext	= "\xd3\x68\x14\x70\x3c\x01\x43\x86"
-			  "\x02\xab\xbe\x75\xaa\xe7\xf5\x53"
-			  "\x5c\x05\xbd\x9b\x19\xbb\x2a\x61"
-			  "\x8f\x69\x05\x75\x8e\xca\x60\x0c"
-			  "\x5b\xa2\x48\x61\x32\x74\x11\x2b"
-			  "\xf6\xcf\x06\x78\x6f\x78\x1a\x4a",
-		.clen	= 48,
-	}, {
-		.key	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
-			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7"
-			  "\xef\x1c\x5f\xf2\xa3\x88\x86\xed"
-			  "\x8a\x6d\xc1\x05\xe7\x25\xb9\xa2",
-		.klen	= 32,
-		.iv	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
-			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
-			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
-			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58",
-		.assoc	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
-			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
-			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
-			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
-			  "\x9d",
-		.alen	= 33,
-		.ptext	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
-			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c"
-			  "\x4f\x2e\xe8\x55\x66\x80\x27\x00"
-			  "\x1b\x8f\x68\xe7\x0a\x6c\x71\xc3"
-			  "\x21\x78\x55\x9d\x9c\x65\x7b\xcd"
-			  "\x0a\x34\x97\xff\x47\x37\xb0\x2a"
-			  "\x80\x0d\x19\x98\x33\xa9\x7a\xe3"
-			  "\x2e\x4c\xc6\xf3\x8c\x88\x42\x01"
-			  "\xbd",
-		.plen	= 65,
-		.ctext	= "\x07\x0a\x35\xb0\x82\x03\x5a\xd2"
-			  "\x15\x3a\x6c\x72\x83\x9b\xb1\x75"
-			  "\xea\xf2\xfc\xff\xc6\xf1\x13\xa4"
-			  "\x1a\x93\x33\x79\x97\x82\x81\xc0"
-			  "\x96\xc2\x00\xab\x39\xae\xa1\x62"
-			  "\x53\xa3\x86\xc9\x07\x8c\xaf\x22"
-			  "\x47\x31\x29\xca\x4a\x95\xf5\xd5"
-			  "\x20\x63\x5a\x54\x80\x2c\x4a\x63"
-			  "\xfb\x18\x73\x31\x4f\x08\x21\x5d"
-			  "\x20\xe9\xc3\x7e\xea\x25\x77\x3a"
-			  "\x65",
-		.clen	= 81,
-	}, {
-		.key	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
-			  "\x32\x42\x15\x80\x85\xa1\x65\xfe"
-			  "\x19\xde\x6b\x76\xa8\x28\x08\x07"
-			  "\x4b\x9a\xa0\xdd\xc1\x84\x58\x79",
-		.klen	= 32,
-		.iv	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
-			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
-			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
-			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e",
-		.assoc	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
-			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
-			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
-			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
-			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
-			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
-			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
-			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
-			  "\x54",
-		.alen	= 65,
-		.ptext	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
-			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82"
-			  "\x79\xf0\xf3\xd9\x6c\x20\xa9\x1a"
-			  "\xdc\xbc\x47\xc0\xe4\xcb\x10\x99"
-			  "\x2f",
-		.plen	= 33,
-		.ctext	= "\x33\xc1\xda\xfa\x15\x21\x07\x8e"
-			  "\x93\x68\xea\x64\x7b\x3d\x4b\x6b"
-			  "\x71\x5e\x5e\x6b\x92\xaa\x65\xc2"
-			  "\x7a\x2a\xc1\xa9\x0a\xa1\x24\x81"
-			  "\x26\x3a\x5a\x09\xe8\xce\x73\x72"
-			  "\xde\x7b\x58\x9e\x85\xb9\xa4\x28"
-			  "\xda",
-		.clen	= 49,
-	}, {
-		.key	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
-			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04"
-			  "\x44\xa1\x76\xfb\xad\xc8\x8a\x21"
-			  "\x0d\xc8\x7f\xb6\x9b\xe3\xf8\x4f",
-		.klen	= 32,
-		.iv	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
-			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85"
-			  "\x0e\x51\xf9\x1c\xee\x70\x6a\x27"
-			  "\x3d\xd3\xb7\xac\x51\xfa\xdf\x05",
-		.assoc	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
-			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
-		.alen	= 16,
-		.ptext	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
-			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
-		.plen	= 16,
-		.ctext	= "\x3e\xf8\x86\x3d\x39\xf8\x96\x02"
-			  "\x0f\xdf\xc9\x6e\x37\x1e\x57\x99"
-			  "\x07\x2a\x1a\xac\xd1\xda\xfd\x3b"
-			  "\xc7\xff\xbd\xbc\x85\x09\x0b",
-		.clen	= 31,
-	}, {
-		.key	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
-			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a"
-			  "\x6e\x63\x82\x7f\xb2\x68\x0c\x3a"
-			  "\xce\xf5\x5e\x8e\x75\x42\x97\x26",
-		.klen	= 32,
-		.iv	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
-			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c"
-			  "\x39\x14\x05\xa0\xf3\x10\xec\x41"
-			  "\xff\x01\x95\x84\x2b\x59\x7f\xdb",
-		.assoc	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
-			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
-		.alen	= 16,
-		.ptext	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
-			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
-		.plen	= 16,
-		.ctext	= "\x2f\xc4\xd8\x0d\xa6\x07\xef\x2e"
-			  "\x6c\xd9\x84\x63\x70\x97\x61\x37"
-			  "\x08\x2f\x16\x90\x9e\x62\x30\x0d"
-			  "\x62\xd5\xc8\xf0\x46\x1a",
-		.clen	= 30,
-	}, {
-		.key	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
-			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10"
-			  "\x98\x25\x8d\x03\xb7\x08\x8e\x54"
-			  "\x90\x23\x3d\x67\x4f\xa1\x36\xfc",
-		.klen	= 32,
-		.iv	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
-			  "\xd5\x07\x58\x59\x72\xd7\xde\x92"
-			  "\x63\xd6\x10\x24\xf8\xb0\x6e\x5a"
-			  "\xc0\x2e\x74\x5d\x06\xb8\x1e\xb2",
-		.assoc	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
-			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
-		.alen	= 16,
-		.ptext	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
-			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
-		.plen	= 16,
-		.ctext	= "\xce\xf3\x17\x87\x49\xc2\x00\x46"
-			  "\xc6\x12\x5c\x8f\x81\x38\xaa\x55"
-			  "\xf8\x67\x75\xf1\x75\xe3\x2a\x24",
-		.clen	= 24,
-	},
-};
-
-/*
- * MORUS-640 test vectors - generated via reference implementation from
- * SUPERCOP (https://bench.cr.yp.to/supercop.html):
- *
- *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
- *   (see crypto_aead/morus640128v2/)
- */
-static const struct aead_testvec morus640_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 16,
-		.iv	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
-			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x89\x62\x7d\xf3\x07\x9d\x52\x05"
-			  "\x53\xc3\x04\x60\x93\xb4\x37\x9a",
-		.clen	= 16,
-	}, {
-		.key	= "\x3c\x24\x39\x9f\x10\x7b\xa8\x1b"
-			  "\x80\xda\xb2\x91\xf9\x24\xc2\x06",
-		.klen	= 16,
-		.iv	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
-			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x69",
-		.plen	= 1,
-		.ctext	= "\xa8\x8d\xe4\x90\xb5\x50\x8f\x78"
-			  "\xb6\x10\x9a\x59\x5f\x61\x37\x70"
-			  "\x09",
-		.clen	= 17,
-	}, {
-		.key	= "\x79\x49\x73\x3e\x20\xf7\x51\x37"
-			  "\x01\xb4\x64\x22\xf3\x48\x85\x0c",
-		.klen	= 16,
-		.iv	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
-			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xa6\xa4\x1e\x76\xec\xd4\x50\xcc"
-			  "\x62\x58\xe9\x8f\xef\xa4\x17",
-		.plen	= 15,
-		.ctext	= "\x76\xdd\xb9\x05\x3d\xce\x61\x38"
-			  "\xf3\xef\xf7\xe5\xd7\xfd\x70\xa5"
-			  "\xcf\x9d\x64\xb8\x0a\x9f\xfd\x8b"
-			  "\xd4\x6e\xfe\xd9\xc8\x63\x4b",
-		.clen	= 31,
-	}, {
-		.key	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
-			  "\x82\x8e\x16\xb4\xed\x6d\x47\x12",
-		.klen	= 16,
-		.iv	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
-			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xe2\xc9\x58\x15\xfc\x4f\xf8\xe8"
-			  "\xe3\x32\x9b\x21\xe9\xc8\xd9\x97",
-		.plen	= 16,
-		.ctext	= "\xdc\x72\xe8\x14\xfb\x63\xad\x72"
-			  "\x1f\x57\x9a\x1f\x88\x81\xdb\xd6"
-			  "\xc1\x91\x9d\xb9\x25\xc4\x99\x4c"
-			  "\x97\xcd\x8a\x0c\x9d\x68\x00\x1c",
-		.clen	= 32,
-	}, {
-		.key	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
-			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
-		.klen	= 16,
-		.iv	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
-			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x1f\xee\x92\xb4\x0c\xcb\xa1\x04"
-			  "\x64\x0c\x4d\xb2\xe3\xec\x9c\x9d"
-			  "\x09",
-		.plen	= 17,
-		.ctext	= "\x6b\x4f\x3b\x90\x9a\xa2\xb3\x82"
-			  "\x0a\xb8\x55\xee\xeb\x73\x4d\x7f"
-			  "\x54\x11\x3a\x8a\x31\xa3\xb5\xf2"
-			  "\xcd\x49\xdb\xf3\xee\x26\xbd\xa2"
-			  "\x0d",
-		.clen	= 33,
-	}, {
-		.key	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
-			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f",
-		.klen	= 16,
-		.iv	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
-			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x5c\x13\xcb\x54\x1c\x47\x4a\x1f"
-			  "\xe5\xe6\xff\x44\xdd\x11\x5f\xa3"
-			  "\x33\xdd\xc2\xf8\xdd\x18\x2b\x93"
-			  "\x57\x05\x01\x1c\x66\x22\xd3",
-		.plen	= 31,
-		.ctext	= "\x59\xd1\x0f\x6b\xee\x27\x84\x92"
-			  "\xb7\xa9\xb5\xdd\x02\xa4\x12\xa5"
-			  "\x50\x32\xb4\x9a\x2e\x35\x83\x55"
-			  "\x36\x12\x12\xed\xa3\x31\xc5\x30"
-			  "\xa7\xe2\x4a\x6d\x05\x59\x43\x91"
-			  "\x75\xfa\x6c\x17\xc6\x73\xca",
-		.clen	= 47,
-	}, {
-		.key	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
-			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25",
-		.klen	= 16,
-		.iv	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
-			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x98\x37\x05\xf3\x2c\xc2\xf3\x3b"
-			  "\x66\xc0\xb1\xd5\xd7\x35\x21\xaa"
-			  "\x5d\x9f\xce\x7c\xe2\xb8\xad\xad"
-			  "\x19\x33\xe0\xf4\x40\x81\x72\x28",
-		.plen	= 32,
-		.ctext	= "\xdb\x49\x68\x0f\x91\x5b\x21\xb1"
-			  "\xcf\x50\xb2\x4c\x32\xe1\xa6\x69"
-			  "\xc0\xfb\x44\x1f\xa0\x9a\xeb\x39"
-			  "\x1b\xde\x68\x38\xcc\x27\x52\xc5"
-			  "\xf6\x3e\x74\xea\x66\x5b\x5f\x0c"
-			  "\x65\x9e\x58\xe6\x52\xa2\xfe\x59",
-		.clen	= 48,
-	}, {
-		.key	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
-			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b",
-		.klen	= 16,
-		.iv	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
-			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
-		.assoc	= "\xc5",
-		.alen	= 1,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x56\xe7\x24\x52\xdd\x95\x60\x5b"
-			  "\x09\x48\x39\x69\x9c\xb3\x62\x46",
-		.clen	= 16,
-	}, {
-		.key	= "\xe4\x25\xcd\xfa\x80\xdd\x46\xde"
-			  "\x07\xd1\x90\x8b\xcf\x23\x15\x31",
-		.klen	= 16,
-		.iv	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
-			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
-		.assoc	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
-			  "\x47\x3e\xe9\xd4\xcc\xb5\x76",
-		.alen	= 15,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xdd\xfa\x6c\x1f\x5d\x86\x87\x01"
-			  "\x13\xe5\x73\x46\x46\xf2\x5c\xe1",
-		.clen	= 16,
-	}, {
-		.key	= "\x20\x4a\x07\x99\x91\x58\xee\xfa"
-			  "\x88\xab\x42\x1c\xc9\x47\xd7\x38",
-		.klen	= 16,
-		.iv	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
-			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
-		.assoc	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
-			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b",
-		.alen	= 16,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xa6\x1b\xb9\xd7\x5e\x3c\xcf\xac"
-			  "\xa9\x21\x45\x0b\x16\x52\xf7\xe1",
-		.clen	= 16,
-	}, {
-		.key	= "\x5d\x6f\x41\x39\xa1\xd4\x97\x16"
-			  "\x09\x85\xf4\xae\xc3\x6b\x9a\x3e",
-		.klen	= 16,
-		.iv	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
-			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
-		.assoc	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
-			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
-			  "\x3c",
-		.alen	= 17,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x15\xff\xde\x3b\x34\xfc\xf6\xf9"
-			  "\xbb\xa8\x62\xad\x0a\xf5\x48\x60",
-		.clen	= 16,
-	}, {
-		.key	= "\x99\x93\x7a\xd8\xb1\x50\x40\x31"
-			  "\x8a\x60\xa6\x3f\xbd\x90\x5d\x44",
-		.klen	= 16,
-		.iv	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
-			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
-		.assoc	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
-			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
-			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
-			  "\xaf\x0b\x02\x38\xcc\x44\xa7",
-		.alen	= 31,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xd2\x9d\xf8\x3b\xd7\x84\xe9\x2d"
-			  "\x4b\xef\x75\x16\x0a\x99\xae\x6b",
-		.clen	= 16,
-	}, {
-		.key	= "\xd6\xb8\xb4\x77\xc1\xcb\xe9\x4d"
-			  "\x0a\x3a\x58\xd1\xb7\xb4\x1f\x4a",
-		.klen	= 16,
-		.iv	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
-			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
-		.assoc	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
-			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
-			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
-			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a",
-		.alen	= 32,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xe4\x8d\xa7\xa7\x45\xc1\x31\x4f"
-			  "\xce\xfb\xaf\xd6\xc2\xe6\xee\xc0",
-		.clen	= 16,
-	}, {
-		.key	= "\x12\xdd\xee\x17\xd1\x47\x92\x69"
-			  "\x8b\x14\x0a\x62\xb1\xd9\xe2\x50",
-		.klen	= 16,
-		.iv	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
-			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
-		.assoc	= "\x31",
-		.alen	= 1,
-		.ptext	= "\x40",
-		.plen	= 1,
-		.ctext	= "\xe2\x67\x38\x4f\xb9\xad\x7d\x38"
-			  "\x01\xfe\x84\x14\x85\xf8\xd1\xe3"
-			  "\x22",
-		.clen	= 17,
-	}, {
-		.key	= "\x4f\x01\x27\xb6\xe1\xc3\x3a\x85"
-			  "\x0c\xee\xbc\xf4\xab\xfd\xa5\x57",
-		.klen	= 16,
-		.iv	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
-			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
-		.assoc	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
-			  "\x4d\x5b\x15\x3c\xa8\x8f\x06",
-		.alen	= 15,
-		.ptext	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
-			  "\x6d\x92\x42\x61\xa7\x58\x37",
-		.plen	= 15,
-		.ctext	= "\x77\x32\x61\xeb\xb4\x33\x29\x92"
-			  "\x29\x95\xc5\x8e\x85\x76\xab\xfc"
-			  "\x07\x95\xa7\x44\x74\xf7\x22\xff"
-			  "\xd8\xd8\x36\x3d\x8a\x7f\x9e",
-		.clen	= 31,
-	}, {
-		.key	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
-			  "\x8d\xc8\x6e\x85\xa5\x21\x67\x5d",
-		.klen	= 16,
-		.iv	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
-			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
-		.assoc	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
-			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60",
-		.alen	= 16,
-		.ptext	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
-			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2",
-		.plen	= 16,
-		.ctext	= "\xd8\xfd\x44\x45\xf6\x42\x12\x38"
-			  "\xf2\x0b\xea\x4f\x9e\x11\x61\x07"
-			  "\x48\x67\x98\x18\x9b\xd0\x0c\x59"
-			  "\x67\xa4\x11\xb3\x2b\xd6\xc1\x70",
-		.clen	= 32,
-	}, {
-		.key	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
-			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
-		.klen	= 16,
-		.iv	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
-			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
-		.assoc	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
-			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
-			  "\x3b",
-		.alen	= 17,
-		.ptext	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
-			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
-			  "\x05",
-		.plen	= 17,
-		.ctext	= "\xb1\xab\x53\x4e\xc7\x40\x16\xb6"
-			  "\x71\x3a\x00\x9f\x41\x88\xb0\xb2"
-			  "\x71\x83\x85\x5f\xc8\x79\x0a\x99"
-			  "\x99\xdc\x89\x1c\x88\xd2\x3e\xf9"
-			  "\x83",
-		.clen	= 33,
-	}, {
-		.key	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
-			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69",
-		.klen	= 16,
-		.iv	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
-			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
-		.assoc	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
-			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
-			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
-			  "\x38\x1d\x3b\x4a\xe9\x7e\x62",
-		.alen	= 31,
-		.ptext	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
-			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
-			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
-			  "\x68\x28\x73\x40\x9f\x96\x4a",
-		.plen	= 31,
-		.ctext	= "\x29\xc4\xf0\x03\xc1\x86\xdf\x06"
-			  "\x5c\x7b\xef\x64\x87\x00\xd1\x37"
-			  "\xa7\x08\xbc\x7f\x8f\x41\x54\xd0"
-			  "\x3e\xf1\xc3\xa2\x96\x84\xdd\x2a"
-			  "\x2d\x21\x30\xf9\x02\xdb\x06\x0c"
-			  "\xf1\x5a\x66\x69\xe0\xca\x83",
-		.clen	= 47,
-	}, {
-		.key	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
-			  "\x10\x57\x85\x39\x93\x8f\xaf\x70",
-		.klen	= 16,
-		.iv	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
-			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
-		.assoc	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
-			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
-			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
-			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81",
-		.alen	= 32,
-		.ptext	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
-			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
-			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
-			  "\x29\x56\x52\x19\x79\xf5\xe9\x37",
-		.plen	= 32,
-		.ctext	= "\xe2\x2e\x44\xdf\xd3\x60\x6d\xb2"
-			  "\x70\x57\x37\xc5\xc2\x4f\x8d\x14"
-			  "\xc6\xbf\x8b\xec\xf5\x62\x67\xf2"
-			  "\x2f\xa1\xe6\xd6\xa7\xb1\x8c\x54"
-			  "\xe5\x6b\x49\xf9\x6e\x90\xc3\xaa"
-			  "\x7a\x00\x2e\x4d\x7f\x31\x2e\x81",
-		.clen	= 48,
-	}, {
-		.key	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
-			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76",
-		.klen	= 16,
-		.iv	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
-			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
-		.assoc	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
-			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
-			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
-			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58"
-			  "\x1a",
-		.alen	= 33,
-		.ptext	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
-			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
-			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
-			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
-			  "\x9d\x4d\x54\x51\x84\x61\xf6\x8e"
-			  "\x03\x31\xf2\x25\x16\xcc\xaa\xc6"
-			  "\x75\x73\x20\x30\x59\x54\xb2\xf0"
-			  "\x3a\x4b\xe0\x23\x8e\xa6\x08\x35"
-			  "\x8a",
-		.plen	= 65,
-		.ctext	= "\xc7\xca\x26\x61\x57\xee\xa2\xb9"
-			  "\xb1\x37\xde\x95\x06\x90\x11\x08"
-			  "\x4d\x30\x9f\x24\xc0\x56\xb7\xe1"
-			  "\x0b\x9f\xd2\x57\xe9\xd2\xb1\x76"
-			  "\x56\x9a\xb4\x58\xc5\x08\xfc\xb5"
-			  "\xf2\x31\x9b\xc9\xcd\xb3\x64\xdb"
-			  "\x6f\x50\xbf\xf4\x73\x9d\xfb\x6b"
-			  "\xef\x35\x25\x48\xed\xcf\x29\xa8"
-			  "\xac\xc3\xb9\xcb\x61\x8f\x73\x92"
-			  "\x2c\x7a\x6f\xda\xf9\x09\x6f\xe1"
-			  "\xc4",
-		.clen	= 81,
-	}, {
-		.key	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
-			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c",
-		.klen	= 16,
-		.iv	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
-			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
-		.assoc	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
-			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
-			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
-			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e"
-			  "\x28\xce\x57\x34\xcd\x6e\x84\x4c"
-			  "\x17\x3c\xe1\xb2\xa8\x0b\xbb\xf1"
-			  "\x96\x41\x0d\x69\xe8\x54\x0a\xc8"
-			  "\x15\x4e\x91\x92\x89\x4b\xb7\x9b"
-			  "\x21",
-		.alen	= 65,
-		.ptext	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
-			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
-			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
-			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
-			  "\xac",
-		.plen	= 33,
-		.ctext	= "\x57\xcd\x3d\x46\xc5\xf9\x68\x3b"
-			  "\x2c\x0f\xb4\x7e\x7b\x64\x3e\x40"
-			  "\xf3\x78\x63\x34\x89\x79\x39\x6b"
-			  "\x61\x64\x4a\x9a\xfa\x70\xa4\xd3"
-			  "\x54\x0b\xea\x05\xa6\x95\x64\xed"
-			  "\x3d\x69\xa2\x0c\x27\x56\x2f\x34"
-			  "\x66",
-		.clen	= 49,
-	}, {
-		.key	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
-			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82",
-		.klen	= 16,
-		.iv	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
-			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
-		.assoc	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
-			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85",
-		.alen	= 16,
-		.ptext	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
-			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07",
-		.plen	= 16,
-		.ctext	= "\xfc\x85\x06\x28\x8f\xe8\x23\x1f"
-			  "\x33\x98\x87\xde\x08\xb6\xb6\xae"
-			  "\x3e\xa4\xf8\x19\xf1\x92\x60\x39"
-			  "\xb9\x6b\x3f\xdf\xc8\xcb\x30",
-		.clen	= 31,
-	}, {
-		.key	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
-			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
-		.klen	= 16,
-		.iv	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
-			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
-		.assoc	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
-			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c",
-		.alen	= 16,
-		.ptext	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
-			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d",
-		.plen	= 16,
-		.ctext	= "\x74\x7d\x70\x07\xe9\xba\x01\xee"
-			  "\x6c\xc6\x6f\x50\x25\x33\xbe\x50"
-			  "\x17\xb8\x17\x62\xed\x80\xa2\xf5"
-			  "\x03\xde\x85\x71\x5d\x34",
-		.clen	= 30,
-	}, {
-		.key	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
-			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
-		.klen	= 16,
-		.iv	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
-			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
-		.assoc	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
-			  "\xd5\x07\x58\x59\x72\xd7\xde\x92",
-		.alen	= 16,
-		.ptext	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
-			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13",
-		.plen	= 16,
-		.ctext	= "\xf4\xb3\x85\xf9\xac\xde\xb1\x38"
-			  "\x29\xfd\x6c\x7c\x49\xe5\x1d\xaf"
-			  "\xba\xea\xd4\xfa\x3f\x11\x33\x98",
-		.clen	= 24,
-	}, {
-		.key	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
-			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
-		.klen	= 16,
-		.iv	= "\xbb\x3a\xf7\x57\xc6\x36\x7c\x22"
-			  "\x36\xab\xde\xc6\x6d\x32\x70\x17",
-		.assoc	= "\xcb\x03\x85\xbf\x0a\xd5\x26\xa9"
-			  "\x56\xe1\x0a\xeb\x6c\xfb\xa1\x98",
-		.alen	= 16,
-		.ptext	= "\xda\xcc\x14\x27\x4e\x74\xd1\x30"
-			  "\x76\x18\x37\x0f\x6a\xc4\xd1\x1a",
-		.plen	= 16,
-		.ctext	= "\xe6\x5c\x49\x4f\x78\xf3\x62\x86"
-			  "\xe1\xb7\xa5\xc3\x32\x88\x3c\x8c"
-			  "\x6e",
-		.clen	= 17,
-	},
-};
-
-/*
- * MORUS-1280 test vectors - generated via reference implementation from
- * SUPERCOP (https://bench.cr.yp.to/supercop.html):
- *
- *   https://bench.cr.yp.to/supercop/supercop-20170228.tar.xz
- *   (see crypto_aead/morus1280128v2/ and crypto_aead/morus1280256v2/ )
- */
-static const struct aead_testvec morus1280_tv_template[] = {
-	{
-		.key	= "\x00\x00\x00\x00\x00\x00\x00\x00"
-			  "\x00\x00\x00\x00\x00\x00\x00\x00",
-		.klen	= 16,
-		.iv	= "\x0f\xc9\x8e\x67\x44\x9e\xaa\x86"
-			  "\x20\x36\x2c\x24\xfe\xc9\x30\x81",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x91\x85\x0f\xf5\x52\x9e\xce\xce"
-			  "\x65\x99\xc7\xbf\xd3\x76\xe8\x98",
-		.clen	= 16,
-	}, {
-		.key	= "\x3c\x24\x39\x9f\x10\x7b\xa8\x1b"
-			  "\x80\xda\xb2\x91\xf9\x24\xc2\x06",
-		.klen	= 16,
-		.iv	= "\x4b\xed\xc8\x07\x54\x1a\x52\xa2"
-			  "\xa1\x10\xde\xb5\xf8\xed\xf3\x87",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x69",
-		.plen	= 1,
-		.ctext	= "\x88\xc3\x4c\xf0\x2f\x43\x76\x13"
-			  "\x96\xda\x76\x34\x33\x4e\xd5\x39"
-			  "\x73",
-		.clen	= 17,
-	}, {
-		.key	= "\x79\x49\x73\x3e\x20\xf7\x51\x37"
-			  "\x01\xb4\x64\x22\xf3\x48\x85\x0c",
-		.klen	= 16,
-		.iv	= "\x88\x12\x01\xa6\x64\x96\xfb\xbe"
-			  "\x22\xea\x90\x47\xf2\x11\xb5\x8e",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xa6\xa4\x1e\x76\xec\xd4\x50\xcc"
-			  "\x62\x58\xe9\x8f\xef\xa4\x17\x91"
-			  "\xb4\x96\x9f\x6b\xce\x38\xa5\x46"
-			  "\x13\x7d\x64\x93\xd7\x05\xf5",
-		.plen	= 31,
-		.ctext	= "\x3e\x5c\x3b\x58\x3b\x7d\x2a\x22"
-			  "\x75\x0b\x24\xa6\x0e\xc3\xde\x52"
-			  "\x97\x0b\x64\xd4\xce\x90\x52\xf7"
-			  "\xef\xdb\x6a\x38\xd2\xa8\xa1\x0d"
-			  "\xe0\x61\x33\x24\xc6\x4d\x51\xbc"
-			  "\xa4\x21\x74\xcf\x19\x16\x59",
-		.clen	= 47,
-	}, {
-		.key	= "\xb5\x6e\xad\xdd\x30\x72\xfa\x53"
-			  "\x82\x8e\x16\xb4\xed\x6d\x47\x12",
-		.klen	= 16,
-		.iv	= "\xc4\x37\x3b\x45\x74\x11\xa4\xda"
-			  "\xa2\xc5\x42\xd8\xec\x36\x78\x94",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xe2\xc9\x58\x15\xfc\x4f\xf8\xe8"
-			  "\xe3\x32\x9b\x21\xe9\xc8\xd9\x97"
-			  "\xde\x58\xab\xf0\xd3\xd8\x27\x60"
-			  "\xd5\xaa\x43\x6b\xb1\x64\x95\xa4",
-		.plen	= 32,
-		.ctext	= "\x30\x82\x9c\x2b\x67\xcb\xf9\x1f"
-			  "\xde\x9f\x77\xb2\xda\x92\x61\x5c"
-			  "\x09\x0b\x2d\x9a\x26\xaa\x1c\x06"
-			  "\xab\x74\xb7\x2b\x95\x5f\x9f\xa1"
-			  "\x9a\xff\x50\xa0\xa2\xff\xc5\xad"
-			  "\x21\x8e\x84\x5c\x12\x61\xb2\xae",
-		.clen	= 48,
-	}, {
-		.key	= "\xf2\x92\xe6\x7d\x40\xee\xa3\x6f"
-			  "\x03\x68\xc8\x45\xe7\x91\x0a\x18",
-		.klen	= 16,
-		.iv	= "\x01\x5c\x75\xe5\x84\x8d\x4d\xf6"
-			  "\x23\x9f\xf4\x6a\xe6\x5a\x3b\x9a",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x1f\xee\x92\xb4\x0c\xcb\xa1\x04"
-			  "\x64\x0c\x4d\xb2\xe3\xec\x9c\x9d"
-			  "\x09\x1a\xb7\x74\xd8\x78\xa9\x79"
-			  "\x96\xd8\x22\x43\x8c\xc3\x34\x7b"
-			  "\xc4",
-		.plen	= 33,
-		.ctext	= "\x67\x5d\x8e\x45\xc8\x39\xf5\x17"
-			  "\xc1\x1d\x2a\xdd\x88\x67\xda\x1f"
-			  "\x6d\xe8\x37\x28\x5a\xc1\x5e\x9f"
-			  "\xa6\xec\xc6\x92\x05\x4b\xc0\xa3"
-			  "\x63\xef\x88\xa4\x9b\x0a\x5c\xed"
-			  "\x2b\x6a\xac\x63\x52\xaa\x10\x94"
-			  "\xd0",
-		.clen	= 49,
-	}, {
-		.key	= "\x2e\xb7\x20\x1c\x50\x6a\x4b\x8b"
-			  "\x84\x42\x7a\xd7\xe1\xb5\xcd\x1f",
-		.klen	= 16,
-		.iv	= "\x3d\x80\xae\x84\x94\x09\xf6\x12"
-			  "\xa4\x79\xa6\xfb\xe0\x7f\xfd\xa0",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x5c\x13\xcb\x54\x1c\x47\x4a\x1f"
-			  "\xe5\xe6\xff\x44\xdd\x11\x5f\xa3"
-			  "\x33\xdd\xc2\xf8\xdd\x18\x2b\x93"
-			  "\x57\x05\x01\x1c\x66\x22\xd3\x51"
-			  "\xd3\xdf\x18\xc9\x30\x66\xed\xb1"
-			  "\x96\x58\xd5\x8c\x64\x8c\x7c\xf5"
-			  "\x01\xd0\x74\x5f\x9b\xaa\xf6\xd1"
-			  "\xe6\x16\xa2\xac\xde\x47\x40",
-		.plen	= 63,
-		.ctext	= "\x7d\x61\x1a\x35\x20\xcc\x07\x88"
-			  "\x03\x98\x87\xcf\xc0\x6e\x4d\x19"
-			  "\xe3\xd4\x0b\xfb\x29\x8f\x49\x1a"
-			  "\x3a\x06\x77\xce\x71\x2c\xcd\xdd"
-			  "\xed\xf6\xc9\xbe\xa6\x3b\xb8\xfc"
-			  "\x6c\xbe\x77\xed\x74\x0e\x20\x85"
-			  "\xd0\x65\xde\x24\x6f\xe3\x25\xc5"
-			  "\xdf\x5b\x0f\xbd\x8a\x88\x78\xc9"
-			  "\xe5\x81\x37\xde\x84\x7a\xf6\x84"
-			  "\x99\x7a\x72\x9c\x54\x31\xa1",
-		.clen	= 79,
-	}, {
-		.key	= "\x6b\xdc\x5a\xbb\x60\xe5\xf4\xa6"
-			  "\x05\x1d\x2c\x68\xdb\xda\x8f\x25",
-		.klen	= 16,
-		.iv	= "\x7a\xa5\xe8\x23\xa4\x84\x9e\x2d"
-			  "\x25\x53\x58\x8c\xda\xa3\xc0\xa6",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x98\x37\x05\xf3\x2c\xc2\xf3\x3b"
-			  "\x66\xc0\xb1\xd5\xd7\x35\x21\xaa"
-			  "\x5d\x9f\xce\x7c\xe2\xb8\xad\xad"
-			  "\x19\x33\xe0\xf4\x40\x81\x72\x28"
-			  "\xe1\x8b\x1c\xf8\x91\x78\xff\xaf"
-			  "\xb0\x68\x69\xf2\x27\x35\x91\x84"
-			  "\x2e\x37\x5b\x00\x04\xff\x16\x9c"
-			  "\xb5\x19\x39\xeb\xd9\xcd\x29\x9a",
-		.plen	= 64,
-		.ctext	= "\x05\xc5\xb1\xf9\x1b\xb9\xab\x2c"
-			  "\xa5\x07\x12\xa7\x12\x39\x60\x66"
-			  "\x30\x81\x4a\x03\x78\x28\x45\x52"
-			  "\xd2\x2b\x24\xfd\x8b\xa5\xb7\x66"
-			  "\x6f\x45\xd7\x3b\x67\x6f\x51\xb9"
-			  "\xc0\x3d\x6c\xca\x1e\xae\xff\xb6"
-			  "\x79\xa9\xe4\x82\x5d\x4c\x2d\xdf"
-			  "\xeb\x71\x40\xc9\x2c\x40\x45\x6d"
-			  "\x73\x77\x01\xf3\x4f\xf3\x9d\x2a"
-			  "\x5d\x57\xa8\xa1\x18\xa2\xad\xcb",
-		.clen	= 80,
-	}, {
-		.key	= "\xa7\x00\x93\x5b\x70\x61\x9d\xc2"
-			  "\x86\xf7\xde\xfa\xd5\xfe\x52\x2b",
-		.klen	= 16,
-		.iv	= "\xb6\xca\x22\xc3\xb4\x00\x47\x49"
-			  "\xa6\x2d\x0a\x1e\xd4\xc7\x83\xad",
-		.assoc	= "\xc5",
-		.alen	= 1,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x4d\xbf\x11\xac\x7f\x97\x0b\x2e"
-			  "\x89\x3b\x9d\x0f\x83\x1c\x08\xc3",
-		.clen	= 16,
-	}, {
-		.key	= "\xe4\x25\xcd\xfa\x80\xdd\x46\xde"
-			  "\x07\xd1\x90\x8b\xcf\x23\x15\x31",
-		.klen	= 16,
-		.iv	= "\xf3\xee\x5c\x62\xc4\x7c\xf0\x65"
-			  "\x27\x08\xbd\xaf\xce\xec\x45\xb3",
-		.assoc	= "\x02\xb8\xea\xca\x09\x1b\x9a\xec"
-			  "\x47\x3e\xe9\xd4\xcc\xb5\x76\x34"
-			  "\xe8\x73\x62\x64\xab\x50\xd0\xda"
-			  "\x6b\x83\x66\xaf\x3e\x27\xc9",
-		.alen	= 31,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x5b\xc0\x8d\x54\xe4\xec\xbe\x38"
-			  "\x03\x12\xf9\xcc\x9e\x46\x42\x92",
-		.clen	= 16,
-	}, {
-		.key	= "\x20\x4a\x07\x99\x91\x58\xee\xfa"
-			  "\x88\xab\x42\x1c\xc9\x47\xd7\x38",
-		.klen	= 16,
-		.iv	= "\x2f\x13\x95\x01\xd5\xf7\x99\x81"
-			  "\xa8\xe2\x6f\x41\xc8\x10\x08\xb9",
-		.assoc	= "\x3f\xdc\x24\x69\x19\x96\x43\x08"
-			  "\xc8\x18\x9b\x65\xc6\xd9\x39\x3b"
-			  "\x12\x35\x6e\xe8\xb0\xf0\x52\xf3"
-			  "\x2d\xb0\x45\x87\x18\x86\x68\xf6",
-		.alen	= 32,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x48\xc5\xc3\x4c\x40\x2e\x2f\xc2"
-			  "\x6d\x65\xe0\x67\x9c\x1d\xa0\xf0",
-		.clen	= 16,
-	}, {
-		.key	= "\x5d\x6f\x41\x39\xa1\xd4\x97\x16"
-			  "\x09\x85\xf4\xae\xc3\x6b\x9a\x3e",
-		.klen	= 16,
-		.iv	= "\x6c\x38\xcf\xa1\xe5\x73\x41\x9d"
-			  "\x29\xbc\x21\xd2\xc2\x35\xcb\xbf",
-		.assoc	= "\x7b\x01\x5d\x08\x29\x12\xec\x24"
-			  "\x49\xf3\x4d\xf7\xc0\xfe\xfb\x41"
-			  "\x3c\xf8\x79\x6c\xb6\x90\xd4\x0d"
-			  "\xee\xde\x23\x60\xf2\xe5\x08\xcc"
-			  "\x97",
-		.alen	= 33,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x28\x64\x78\x51\x55\xd8\x56\x4a"
-			  "\x58\x3e\xf7\xbe\xee\x21\xfe\x94",
-		.clen	= 16,
-	}, {
-		.key	= "\x99\x93\x7a\xd8\xb1\x50\x40\x31"
-			  "\x8a\x60\xa6\x3f\xbd\x90\x5d\x44",
-		.klen	= 16,
-		.iv	= "\xa8\x5c\x09\x40\xf5\xef\xea\xb8"
-			  "\xaa\x96\xd3\x64\xbc\x59\x8d\xc6",
-		.assoc	= "\xb8\x26\x97\xa8\x39\x8e\x94\x3f"
-			  "\xca\xcd\xff\x88\xba\x22\xbe\x47"
-			  "\x67\xba\x85\xf1\xbb\x30\x56\x26"
-			  "\xaf\x0b\x02\x38\xcc\x44\xa7\xa3"
-			  "\xa6\xbf\x31\x93\x60\xcd\xda\x63"
-			  "\x2c\xb1\xaa\x19\xc8\x19\xf8\xeb"
-			  "\x03\xa1\xe8\xbe\x37\x54\xec\xa2"
-			  "\xcd\x2c\x45\x58\xbd\x8e\x80",
-		.alen	= 63,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xb3\xa6\x00\x4e\x09\x20\xac\x21"
-			  "\x77\x72\x69\x76\x2d\x36\xe5\xc8",
-		.clen	= 16,
-	}, {
-		.key	= "\xd6\xb8\xb4\x77\xc1\xcb\xe9\x4d"
-			  "\x0a\x3a\x58\xd1\xb7\xb4\x1f\x4a",
-		.klen	= 16,
-		.iv	= "\xe5\x81\x42\xdf\x05\x6a\x93\xd4"
-			  "\x2b\x70\x85\xf5\xb6\x7d\x50\xcc",
-		.assoc	= "\xf4\x4a\xd1\x47\x49\x09\x3d\x5b"
-			  "\x4b\xa7\xb1\x19\xb4\x46\x81\x4d"
-			  "\x91\x7c\x91\x75\xc0\xd0\xd8\x40"
-			  "\x71\x39\xe1\x10\xa6\xa3\x46\x7a"
-			  "\xb4\x6b\x35\xc2\xc1\xdf\xed\x60"
-			  "\x46\xc1\x3e\x7f\x8c\xc2\x0e\x7a"
-			  "\x30\x08\xd0\x5f\xa0\xaa\x0c\x6d"
-			  "\x9c\x2f\xdb\x97\xb8\x15\x69\x01",
-		.alen	= 64,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x65\x33\x7b\xa1\x63\xf4\x20\xdd"
-			  "\xe4\xb9\x4a\xaa\x9a\x21\xaa\x14",
-		.clen	= 16,
-	}, {
-		.key	= "\x12\xdd\xee\x17\xd1\x47\x92\x69"
-			  "\x8b\x14\x0a\x62\xb1\xd9\xe2\x50",
-		.klen	= 16,
-		.iv	= "\x22\xa6\x7c\x7f\x15\xe6\x3c\xf0"
-			  "\xac\x4b\x37\x86\xb0\xa2\x13\xd2",
-		.assoc	= "\x31",
-		.alen	= 1,
-		.ptext	= "\x40",
-		.plen	= 1,
-		.ctext	= "\x1d\x47\x17\x34\x86\xf5\x54\x1a"
-			  "\x6d\x28\xb8\x5d\x6c\xcf\xa0\xb9"
-			  "\xbf",
-		.clen	= 17,
-	}, {
-		.key	= "\x4f\x01\x27\xb6\xe1\xc3\x3a\x85"
-			  "\x0c\xee\xbc\xf4\xab\xfd\xa5\x57",
-		.klen	= 16,
-		.iv	= "\x5e\xcb\xb6\x1e\x25\x62\xe4\x0c"
-			  "\x2d\x25\xe9\x18\xaa\xc6\xd5\xd8",
-		.assoc	= "\x6d\x94\x44\x86\x69\x00\x8f\x93"
-			  "\x4d\x5b\x15\x3c\xa8\x8f\x06\x5a"
-			  "\xe6\x01\xa8\x7e\xca\x10\xdc\x73"
-			  "\xf4\x94\x9f\xc1\x5a\x61\x85",
-		.alen	= 31,
-		.ptext	= "\x7c\x5d\xd3\xee\xad\x9f\x39\x1a"
-			  "\x6d\x92\x42\x61\xa7\x58\x37\xdb"
-			  "\xb0\xb2\x2b\x9f\x0b\xb8\xbd\x7a"
-			  "\x24\xa0\xd6\xb7\x11\x79\x6c",
-		.plen	= 31,
-		.ctext	= "\x78\x90\x52\xae\x0f\xf7\x2e\xef"
-			  "\x63\x09\x08\x58\xb5\x56\xbd\x72"
-			  "\x6e\x42\xcf\x27\x04\x7c\xdb\x92"
-			  "\x18\xe9\xa4\x33\x90\xba\x62\xb5"
-			  "\x70\xd3\x88\x9b\x4f\x05\xa7\x51"
-			  "\x85\x87\x17\x09\x42\xed\x4e",
-		.clen	= 47,
-	}, {
-		.key	= "\x8b\x26\x61\x55\xf1\x3e\xe3\xa1"
-			  "\x8d\xc8\x6e\x85\xa5\x21\x67\x5d",
-		.klen	= 16,
-		.iv	= "\x9b\xef\xf0\xbd\x35\xdd\x8d\x28"
-			  "\xad\xff\x9b\xa9\xa4\xeb\x98\xdf",
-		.assoc	= "\xaa\xb8\x7e\x25\x79\x7c\x37\xaf"
-			  "\xce\x36\xc7\xce\xa2\xb4\xc9\x60"
-			  "\x10\xc3\xb3\x02\xcf\xb0\x5e\x8d"
-			  "\xb5\xc2\x7e\x9a\x35\xc0\x24\xfd",
-		.alen	= 32,
-		.ptext	= "\xb9\x82\x0c\x8d\xbd\x1b\xe2\x36"
-			  "\xee\x6c\xf4\xf2\xa1\x7d\xf9\xe2"
-			  "\xdb\x74\x36\x23\x11\x58\x3f\x93"
-			  "\xe5\xcd\xb5\x90\xeb\xd8\x0c\xb3",
-		.plen	= 32,
-		.ctext	= "\x1d\x2c\x57\xe0\x50\x38\x3d\x41"
-			  "\x2e\x71\xc8\x3b\x92\x43\x58\xaf"
-			  "\x5a\xfb\xad\x8f\xd9\xd5\x8a\x5e"
-			  "\xdb\xf3\xcd\x3a\x2b\xe1\x2c\x1a"
-			  "\xb0\xed\xe3\x0c\x6e\xf9\xf2\xd6"
-			  "\x90\xe6\xb1\x0e\xa5\x8a\xac\xb7",
-		.clen	= 48,
-	}, {
-		.key	= "\xc8\x4b\x9b\xf5\x01\xba\x8c\xbd"
-			  "\x0e\xa3\x21\x16\x9f\x46\x2a\x63",
-		.klen	= 16,
-		.iv	= "\xd7\x14\x29\x5d\x45\x59\x36\x44"
-			  "\x2e\xd9\x4d\x3b\x9e\x0f\x5b\xe5",
-		.assoc	= "\xe6\xdd\xb8\xc4\x89\xf8\xe0\xca"
-			  "\x4f\x10\x7a\x5f\x9c\xd8\x8b\x66"
-			  "\x3b\x86\xbf\x86\xd4\x50\xe0\xa7"
-			  "\x76\xef\x5c\x72\x0f\x1f\xc3\xd4"
-			  "\xee",
-		.alen	= 33,
-		.ptext	= "\xf5\xa6\x46\x2c\xce\x97\x8a\x51"
-			  "\x6f\x46\xa6\x83\x9b\xa1\xbc\xe8"
-			  "\x05\x36\x42\xa7\x16\xf8\xc1\xad"
-			  "\xa7\xfb\x94\x68\xc5\x37\xab\x8a"
-			  "\x72",
-		.plen	= 33,
-		.ctext	= "\x59\x10\x84\x1c\x83\x4c\x8b\xfc"
-			  "\xfd\x2e\x4b\x46\x84\xff\x78\x4e"
-			  "\x50\xda\x5c\xb9\x61\x1d\xf5\xb9"
-			  "\xfe\xbb\x7f\xae\x8c\xc1\x24\xbd"
-			  "\x8c\x6f\x1f\x9b\xce\xc6\xc1\x37"
-			  "\x08\x06\x5a\xe5\x96\x10\x95\xc2"
-			  "\x5e",
-		.clen	= 49,
-	}, {
-		.key	= "\x05\x70\xd5\x94\x12\x36\x35\xd8"
-			  "\x8f\x7d\xd3\xa8\x99\x6a\xed\x69",
-		.klen	= 16,
-		.iv	= "\x14\x39\x63\xfc\x56\xd5\xdf\x5f"
-			  "\xaf\xb3\xff\xcc\x98\x33\x1d\xeb",
-		.assoc	= "\x23\x02\xf1\x64\x9a\x73\x89\xe6"
-			  "\xd0\xea\x2c\xf1\x96\xfc\x4e\x6d"
-			  "\x65\x48\xcb\x0a\xda\xf0\x62\xc0"
-			  "\x38\x1d\x3b\x4a\xe9\x7e\x62\xaa"
-			  "\xfd\xc9\x4a\xa9\xa9\x39\x4b\x54"
-			  "\xc8\x0e\x24\x7f\x5e\x10\x7a\x45"
-			  "\x10\x0b\x56\x85\xad\x54\xaa\x66"
-			  "\xa8\x43\xcd\xd4\x9b\xb7\xfa",
-		.alen	= 63,
-		.ptext	= "\x32\xcb\x80\xcc\xde\x12\x33\x6d"
-			  "\xf0\x20\x58\x15\x95\xc6\x7f\xee"
-			  "\x2f\xf9\x4e\x2c\x1b\x98\x43\xc7"
-			  "\x68\x28\x73\x40\x9f\x96\x4a\x60"
-			  "\x80\xf4\x4b\xf4\xc1\x3d\xd0\x93"
-			  "\xcf\x12\xc9\x59\x8f\x7a\x7f\xa8"
-			  "\x1b\xa5\x50\xed\x87\xa9\x72\x59"
-			  "\x9c\x44\xb2\xa4\x99\x98\x34",
-		.plen	= 63,
-		.ctext	= "\x9a\x12\xbc\xdf\x72\xa8\x56\x22"
-			  "\x49\x2d\x07\x92\xfc\x3d\x6d\x5f"
-			  "\xef\x36\x19\xae\x91\xfa\xd6\x63"
-			  "\x46\xea\x8a\x39\x14\x21\xa6\x37"
-			  "\x18\xfc\x97\x3e\x16\xa5\x4d\x39"
-			  "\x45\x2e\x69\xcc\x9c\x5f\xdf\x6d"
-			  "\x5e\xa2\xbf\xac\x83\x32\x72\x52"
-			  "\x58\x58\x23\x40\xfd\xa5\xc2\xe6"
-			  "\xe9\x5a\x50\x98\x00\x58\xc9\x86"
-			  "\x4f\x20\x37\xdb\x7b\x22\xa3",
-		.clen	= 79,
-	}, {
-		.key	= "\x41\x94\x0e\x33\x22\xb1\xdd\xf4"
-			  "\x10\x57\x85\x39\x93\x8f\xaf\x70",
-		.klen	= 16,
-		.iv	= "\x50\x5d\x9d\x9b\x66\x50\x88\x7b"
-			  "\x30\x8e\xb1\x5e\x92\x58\xe0\xf1",
-		.assoc	= "\x5f\x27\x2b\x03\xaa\xef\x32\x02"
-			  "\x50\xc4\xde\x82\x90\x21\x11\x73"
-			  "\x8f\x0a\xd6\x8f\xdf\x90\xe4\xda"
-			  "\xf9\x4a\x1a\x23\xc3\xdd\x02\x81"
-			  "\x0b\x76\x4f\xd7\x0a\x4b\x5e\x51"
-			  "\xe3\x1d\xb9\xe5\x21\xb9\x8f\xd4"
-			  "\x3d\x72\x3e\x26\x16\xa9\xca\x32"
-			  "\x77\x47\x63\x14\x95\x3d\xe4\x34",
-		.alen	= 64,
-		.ptext	= "\x6e\xf0\xba\x6b\xee\x8e\xdc\x89"
-			  "\x71\xfb\x0a\xa6\x8f\xea\x41\xf4"
-			  "\x5a\xbb\x59\xb0\x20\x38\xc5\xe0"
-			  "\x29\x56\x52\x19\x79\xf5\xe9\x37"
-			  "\x8f\xa1\x50\x23\x22\x4f\xe3\x91"
-			  "\xe9\x21\x5e\xbf\x52\x23\x95\x37"
-			  "\x48\x0c\x38\x8f\xf0\xff\x92\x24"
-			  "\x6b\x47\x49\xe3\x94\x1f\x1e\x01",
-		.plen	= 64,
-		.ctext	= "\xe6\xeb\x92\x5a\x5b\xf0\x2d\xbb"
-			  "\x23\xec\x35\xe3\xae\xc9\xfb\x0b"
-			  "\x90\x14\x46\xeb\xa8\x8d\xb0\x9b"
-			  "\x39\xda\x8b\x48\xec\xb2\x00\x4e"
-			  "\x80\x6f\x46\x4f\x9b\x1e\xbb\x35"
-			  "\xea\x5a\xbc\xa2\x36\xa5\x89\x45"
-			  "\xc2\xd6\xd7\x15\x0b\xf6\x6c\x56"
-			  "\xec\x99\x7d\x61\xb3\x15\x93\xed"
-			  "\x83\x1e\xd9\x48\x84\x0b\x37\xfe"
-			  "\x95\x74\x44\xd5\x54\xa6\x27\x06",
-		.clen	= 80,
-	}, {
-		.key	= "\x7e\xb9\x48\xd3\x32\x2d\x86\x10"
-			  "\x91\x31\x37\xcb\x8d\xb3\x72\x76",
-		.klen	= 16,
-		.iv	= "\x8d\x82\xd6\x3b\x76\xcc\x30\x97"
-			  "\xb1\x68\x63\xef\x8c\x7c\xa3\xf7",
-		.assoc	= "\x9c\x4b\x65\xa2\xba\x6b\xdb\x1e"
-			  "\xd1\x9e\x90\x13\x8a\x45\xd3\x79"
-			  "\xba\xcd\xe2\x13\xe4\x30\x66\xf4"
-			  "\xba\x78\xf9\xfb\x9d\x3c\xa1\x58"
-			  "\x1a\x22\x53\x05\x6b\x5c\x71\x4f"
-			  "\xfd\x2d\x4d\x4c\xe5\x62\xa5\x63"
-			  "\x6a\xda\x26\xc8\x7f\xff\xea\xfd"
-			  "\x46\x4a\xfa\x53\x8f\xc4\xcd\x68"
-			  "\x58",
-		.alen	= 65,
-		.ptext	= "\xab\x14\xf3\x0a\xfe\x0a\x85\xa5"
-			  "\xf2\xd5\xbc\x38\x89\x0e\x04\xfb"
-			  "\x84\x7d\x65\x34\x25\xd8\x47\xfa"
-			  "\xeb\x83\x31\xf1\x54\x54\x89\x0d"
-			  "\x9d\x4d\x54\x51\x84\x61\xf6\x8e"
-			  "\x03\x31\xf2\x25\x16\xcc\xaa\xc6"
-			  "\x75\x73\x20\x30\x59\x54\xb2\xf0"
-			  "\x3a\x4b\xe0\x23\x8e\xa6\x08\x35"
-			  "\x8a\xdf\x27\xa0\xe4\x60\x99\xae"
-			  "\x8e\x43\xd9\x39\x7b\x10\x40\x67"
-			  "\x5c\x7e\xc9\x70\x63\x34\xca\x59"
-			  "\xfe\x86\xbc\xb7\x9c\x39\xf3\x6d"
-			  "\x6a\x41\x64\x6f\x16\x7f\x65\x7e"
-			  "\x89\x84\x68\xeb\xb0\x51\xbe\x55"
-			  "\x33\x16\x59\x6c\x3b\xef\x88\xad"
-			  "\x2f\xab\xbc\x25\x76\x87\x41\x2f"
-			  "\x36",
-		.plen	= 129,
-		.ctext	= "\x89\x24\x27\x86\xdc\xd7\x6b\xd9"
-			  "\xd1\xcd\xdc\x16\xdd\x2c\xc1\xfb"
-			  "\x52\xb5\xb3\xab\x50\x99\x3f\xa0"
-			  "\x38\xa4\x74\xa5\x04\x15\x63\x05"
-			  "\x8f\x54\x81\x06\x5a\x6b\xa4\x63"
-			  "\x6d\xa7\x21\xcb\xff\x42\x30\x8e"
-			  "\x3b\xd1\xca\x3f\x4b\x1a\xb8\xc3"
-			  "\x42\x01\xe6\xbc\x75\x15\x87\xee"
-			  "\xc9\x8e\x65\x01\xd9\xd8\xb5\x9f"
-			  "\x48\x86\xa6\x5f\x2c\xc7\xb5\xb0"
-			  "\xed\x5d\x14\x7c\x3f\x40\xb1\x0b"
-			  "\x72\xef\x94\x8d\x7a\x85\x56\xe5"
-			  "\x56\x08\x15\x56\xba\xaf\xbd\xf0"
-			  "\x20\xef\xa0\xf6\xa9\xad\xa2\xc9"
-			  "\x1c\x3b\x28\x51\x7e\x77\xb2\x18"
-			  "\x4f\x61\x64\x37\x22\x36\x6d\x78"
-			  "\xed\xed\x35\xe8\x83\xa5\xec\x25"
-			  "\x6b\xff\x5f\x1a\x09\x96\x3d\xdc"
-			  "\x20",
-		.clen	= 145,
-	}, {
-		.key	= "\xba\xde\x82\x72\x42\xa9\x2f\x2c"
-			  "\x12\x0b\xe9\x5c\x87\xd7\x35\x7c",
-		.klen	= 16,
-		.iv	= "\xc9\xa7\x10\xda\x86\x48\xd9\xb3"
-			  "\x32\x42\x15\x80\x85\xa1\x65\xfe",
-		.assoc	= "\xd8\x70\x9f\x42\xca\xe6\x83\x3a"
-			  "\x52\x79\x42\xa5\x84\x6a\x96\x7f"
-			  "\xe4\x8f\xed\x97\xe9\xd0\xe8\x0d"
-			  "\x7c\xa6\xd8\xd4\x77\x9b\x40\x2e"
-			  "\x28\xce\x57\x34\xcd\x6e\x84\x4c"
-			  "\x17\x3c\xe1\xb2\xa8\x0b\xbb\xf1"
-			  "\x96\x41\x0d\x69\xe8\x54\x0a\xc8"
-			  "\x15\x4e\x91\x92\x89\x4b\xb7\x9b"
-			  "\x21\xf7\x42\x89\xac\x12\x2a\x54"
-			  "\x69\xee\x18\xc7\x8d\xed\xe8\xfd"
-			  "\xbb\x04\x28\xe6\x8a\x3c\x98\xc1"
-			  "\x04\x2d\xa9\xa1\x24\x83\xff\xe9"
-			  "\x55\x7a\xf0\xd1\xf6\x63\x05\xe1"
-			  "\xd9\x1e\x75\x72\xc1\x9f\xae\x32"
-			  "\xe1\x6b\xcd\x9e\x61\x19\x23\x86"
-			  "\xd9\xd2\xaf\x8e\xd5\xd3\xa8\xa9"
-			  "\x51",
-		.alen	= 129,
-		.ptext	= "\xe8\x39\x2d\xaa\x0e\x85\x2d\xc1"
-			  "\x72\xaf\x6e\xc9\x82\x33\xc7\x01"
-			  "\xaf\x40\x70\xb8\x2a\x78\xc9\x14"
-			  "\xac\xb1\x10\xca\x2e\xb3\x28\xe4"
-			  "\xac\xfa\x58\x7f\xe5\x73\x09\x8c"
-			  "\x1d\x40\x87\x8c\xd9\x75\xc0\x55"
-			  "\xa2\xda\x07\xd1\xc2\xa9\xd1\xbb"
-			  "\x09\x4f\x77\x62\x88\x2d\xf2\x68"
-			  "\x54",
-		.plen	= 65,
-		.ctext	= "\x36\x78\xb9\x22\xde\x62\x35\x55"
-			  "\x1a\x7a\xf5\x45\xbc\xd7\x15\x82"
-			  "\x01\xe9\x5a\x07\xea\x46\xaf\x91"
-			  "\xcb\x73\xa5\xee\xe1\xb4\xbf\xc2"
-			  "\xdb\xd2\x9d\x59\xde\xfc\x83\x00"
-			  "\xf5\x46\xac\x97\xd5\x57\xa9\xb9"
-			  "\x1f\x8c\xe8\xca\x68\x8b\x91\x0c"
-			  "\x01\xbe\x0a\xaf\x7c\xf6\x67\xa4"
-			  "\xbf\xbc\x88\x3f\x5d\xd1\xf9\x19"
-			  "\x0f\x9d\xb2\xaf\xb9\x6e\x17\xdf"
-			  "\xa2",
-		.clen	= 81,
-	}, {
-		.key	= "\xf7\x02\xbb\x11\x52\x24\xd8\x48"
-			  "\x93\xe6\x9b\xee\x81\xfc\xf7\x82",
-		.klen	= 16,
-		.iv	= "\x06\xcc\x4a\x79\x96\xc3\x82\xcf"
-			  "\xb3\x1c\xc7\x12\x7f\xc5\x28\x04",
-		.assoc	= "\x15\x95\xd8\xe1\xda\x62\x2c\x56"
-			  "\xd3\x53\xf4\x36\x7e\x8e\x59\x85"
-			  "\x0e\x51\xf9\x1c\xee\x70\x6a\x27"
-			  "\x3d\xd3\xb7\xac\x51\xfa\xdf\x05",
-		.alen	= 32,
-		.ptext	= "\x24\x5e\x67\x49\x1e\x01\xd6\xdd"
-			  "\xf3\x89\x20\x5b\x7c\x57\x89\x07"
-			  "\xd9\x02\x7c\x3d\x2f\x18\x4b\x2d"
-			  "\x6e\xde\xee\xa2\x08\x12\xc7\xba",
-		.plen	= 32,
-		.ctext	= "\x08\x1b\x95\x0e\x41\x95\x02\x4b"
-			  "\x9c\xbb\xa8\xd0\x7c\xd3\x44\x6e"
-			  "\x89\x14\x33\x70\x0a\xbc\xea\x39"
-			  "\x88\xaa\x2b\xd5\x73\x11\x55\xf5"
-			  "\x33\x33\x9c\xd7\x42\x34\x49\x8e"
-			  "\x2f\x03\x30\x05\x47\xaf\x34",
-		.clen	= 47,
-	}, {
-		.key	= "\x33\x27\xf5\xb1\x62\xa0\x80\x63"
-			  "\x14\xc0\x4d\x7f\x7b\x20\xba\x89",
-		.klen	= 16,
-		.iv	= "\x42\xf0\x84\x19\xa6\x3f\x2b\xea"
-			  "\x34\xf6\x79\xa3\x79\xe9\xeb\x0a",
-		.assoc	= "\x51\xb9\x12\x80\xea\xde\xd5\x71"
-			  "\x54\x2d\xa6\xc8\x78\xb2\x1b\x8c"
-			  "\x39\x14\x05\xa0\xf3\x10\xec\x41"
-			  "\xff\x01\x95\x84\x2b\x59\x7f\xdb",
-		.alen	= 32,
-		.ptext	= "\x61\x83\xa0\xe8\x2e\x7d\x7f\xf8"
-			  "\x74\x63\xd2\xec\x76\x7c\x4c\x0d"
-			  "\x03\xc4\x88\xc1\x35\xb8\xcd\x47"
-			  "\x2f\x0c\xcd\x7a\xe2\x71\x66\x91",
-		.plen	= 32,
-		.ctext	= "\x97\xca\xf4\xe0\x8d\x89\xbf\x68"
-			  "\x0c\x60\xb9\x27\xdf\xaa\x41\xc6"
-			  "\x25\xd8\xf7\x1f\x10\x15\x48\x61"
-			  "\x4c\x95\x00\xdf\x51\x9b\x7f\xe6"
-			  "\x24\x40\x9e\xbe\x3b\xeb\x1b\x98"
-			  "\xb9\x9c\xe5\xef\xf2\x05",
-		.clen	= 46,
-	}, {
-		.key	= "\x70\x4c\x2f\x50\x72\x1c\x29\x7f"
-			  "\x95\x9a\xff\x10\x75\x45\x7d\x8f",
-		.klen	= 16,
-		.iv	= "\x7f\x15\xbd\xb8\xb6\xba\xd3\x06"
-			  "\xb5\xd1\x2b\x35\x73\x0e\xad\x10",
-		.assoc	= "\x8e\xde\x4c\x20\xfa\x59\x7e\x8d"
-			  "\xd5\x07\x58\x59\x72\xd7\xde\x92"
-			  "\x63\xd6\x10\x24\xf8\xb0\x6e\x5a"
-			  "\xc0\x2e\x74\x5d\x06\xb8\x1e\xb2",
-		.alen	= 32,
-		.ptext	= "\x9d\xa7\xda\x88\x3e\xf8\x28\x14"
-			  "\xf5\x3e\x85\x7d\x70\xa0\x0f\x13"
-			  "\x2e\x86\x93\x45\x3a\x58\x4f\x61"
-			  "\xf0\x3a\xac\x53\xbc\xd0\x06\x68",
-		.plen	= 32,
-		.ctext	= "\x63\x4c\x2a\x8e\xb4\x6b\x63\x0d"
-			  "\xb5\xec\x9b\x4e\x12\x23\xa3\xcf"
-			  "\x1a\x5a\x70\x15\x5a\x10\x40\x51"
-			  "\xca\x47\x4c\x9d\xc9\x97\xf4\x77"
-			  "\xdb\xc8\x10\x2d\xdc\x65\x20\x3f",
-		.clen	= 40,
-	}, {
-		.key	= "\xac\x70\x69\xef\x82\x97\xd2\x9b"
-			  "\x15\x74\xb1\xa2\x6f\x69\x3f\x95",
-		.klen	= 16,
-		.iv	= "\xbb\x3a\xf7\x57\xc6\x36\x7c\x22"
-			  "\x36\xab\xde\xc6\x6d\x32\x70\x17",
-		.assoc	= "\xcb\x03\x85\xbf\x0a\xd5\x26\xa9"
-			  "\x56\xe1\x0a\xeb\x6c\xfb\xa1\x98"
-			  "\x8d\x98\x1c\xa8\xfe\x50\xf0\x74"
-			  "\x81\x5c\x53\x35\xe0\x17\xbd\x88",
-		.alen	= 32,
-		.ptext	= "\xda\xcc\x14\x27\x4e\x74\xd1\x30"
-			  "\x76\x18\x37\x0f\x6a\xc4\xd1\x1a"
-			  "\x58\x49\x9f\xc9\x3f\xf8\xd1\x7a"
-			  "\xb2\x67\x8b\x2b\x96\x2f\xa5\x3e",
-		.plen	= 32,
-		.ctext	= "\xf1\x62\x44\xc7\x5f\x19\xca\x43"
-			  "\x47\x2c\xaf\x68\x82\xbd\x51\xef"
-			  "\x3d\x65\xd8\x45\x2d\x06\x07\x78"
-			  "\x08\x2e\xb3\x23\xcd\x81\x12\x55"
-			  "\x1a",
-		.clen	= 33,
-	}, {
-		.key	= "\xe9\x95\xa2\x8f\x93\x13\x7b\xb7"
-			  "\x96\x4e\x63\x33\x69\x8d\x02\x9b"
-			  "\x23\xf9\x22\xeb\x80\xa0\xb1\x81"
-			  "\xe2\x73\xc3\x21\x4d\x47\x8d\xf4",
-		.klen	= 32,
-		.iv	= "\xf8\x5e\x31\xf7\xd7\xb2\x25\x3e"
-			  "\xb7\x85\x90\x58\x67\x57\x33\x1d",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xdf\x2f\x83\xc0\x45\x4a\x2c\xcf"
-			  "\xb9\xd2\x41\xf6\x80\xa1\x52\x70",
-		.clen	= 16,
-	}, {
-		.key	= "\x25\xba\xdc\x2e\xa3\x8f\x24\xd3"
-			  "\x17\x29\x15\xc5\x63\xb2\xc5\xa1"
-			  "\x4d\xbc\x2d\x6f\x85\x40\x33\x9a"
-			  "\xa3\xa0\xa1\xfa\x27\xa6\x2c\xca",
-		.klen	= 32,
-		.iv	= "\x34\x83\x6a\x96\xe7\x2d\xce\x5a"
-			  "\x38\x5f\x42\xe9\x61\x7b\xf5\x23",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x53",
-		.plen	= 1,
-		.ctext	= "\x01\xd8\x55\x3c\xc0\x5a\x4b\xc7"
-			  "\x01\xf4\x08\xe3\x0d\xf7\xf0\x78"
-			  "\x53",
-		.clen	= 17,
-	}, {
-		.key	= "\x62\xdf\x16\xcd\xb3\x0a\xcc\xef"
-			  "\x98\x03\xc7\x56\x5d\xd6\x87\xa8"
-			  "\x77\x7e\x39\xf3\x8a\xe0\xb5\xb4"
-			  "\x65\xce\x80\xd2\x01\x05\xcb\xa1",
-		.klen	= 32,
-		.iv	= "\x71\xa8\xa4\x35\xf7\xa9\x76\x75"
-			  "\xb8\x39\xf4\x7a\x5b\x9f\xb8\x29",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x8f\x3a\xc1\x05\x7f\xe7\xcb\x83"
-			  "\xf9\xa6\x4d\xc3\x58\x31\x19\x2c"
-			  "\xd7\x90\xc2\x56\x4e\xd8\x57\xc7"
-			  "\xf6\xf0\x27\xb4\x25\x4c\x83",
-		.plen	= 31,
-		.ctext	= "\xc2\x4b\x41\x0f\x2d\xb9\x62\x07"
-			  "\xff\x8e\x74\xf8\xa1\xa6\xd5\x37"
-			  "\xa5\x64\x31\x5c\xca\x73\x9b\x43"
-			  "\xe6\x70\x63\x46\x95\xcb\xf7\xb5"
-			  "\x20\x8c\x75\x7a\x2a\x17\x2f\xa9"
-			  "\xb8\x4d\x11\x42\xd1\xf8\xf1",
-		.clen	= 47,
-	}, {
-		.key	= "\x9e\x03\x4f\x6d\xc3\x86\x75\x0a"
-			  "\x19\xdd\x79\xe8\x57\xfb\x4a\xae"
-			  "\xa2\x40\x45\x77\x90\x80\x37\xce"
-			  "\x26\xfb\x5f\xaa\xdb\x64\x6b\x77",
-		.klen	= 32,
-		.iv	= "\xae\xcc\xde\xd5\x07\x25\x1f\x91"
-			  "\x39\x14\xa6\x0c\x55\xc4\x7b\x30",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\xcc\x5f\xfb\xa4\x8f\x63\x74\x9f"
-			  "\x7a\x81\xff\x55\x52\x56\xdc\x33"
-			  "\x01\x52\xcd\xdb\x53\x78\xd9\xe1"
-			  "\xb7\x1d\x06\x8d\xff\xab\x22\x98",
-		.plen	= 32,
-		.ctext	= "\xbb\x01\x7c\xd1\x2c\x33\x7b\x37"
-			  "\x0a\xee\xc4\x30\x19\xd7\x3a\x6f"
-			  "\xf8\x2b\x67\xf5\x3b\x84\x87\x2a"
-			  "\xfb\x07\x7a\x82\xb5\xe4\x85\x26"
-			  "\x1e\xa8\xe5\x04\x54\xce\xe5\x5f"
-			  "\xb5\x3f\xc1\xd5\x7f\xbd\xd2\xa6",
-		.clen	= 48,
-	}, {
-		.key	= "\xdb\x28\x89\x0c\xd3\x01\x1e\x26"
-			  "\x9a\xb7\x2b\x79\x51\x1f\x0d\xb4"
-			  "\xcc\x03\x50\xfc\x95\x20\xb9\xe7"
-			  "\xe8\x29\x3e\x83\xb5\xc3\x0a\x4e",
-		.klen	= 32,
-		.iv	= "\xea\xf1\x18\x74\x17\xa0\xc8\xad"
-			  "\xba\xee\x58\x9d\x4f\xe8\x3d\x36",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x08\x84\x34\x44\x9f\xde\x1c\xbb"
-			  "\xfb\x5b\xb1\xe6\x4c\x7a\x9f\x39"
-			  "\x2c\x14\xd9\x5f\x59\x18\x5b\xfb"
-			  "\x79\x4b\xe5\x65\xd9\x0a\xc1\x6f"
-			  "\x2e",
-		.plen	= 33,
-		.ctext	= "\xc2\xf4\x40\x55\xf9\x59\xff\x73"
-			  "\x08\xf5\x98\x92\x0c\x7b\x35\x9a"
-			  "\xa8\xf4\x42\x7e\x6f\x93\xca\x22"
-			  "\x23\x06\x1e\xf8\x89\x22\xf4\x46"
-			  "\x7c\x7c\x67\x75\xab\xe5\x75\xaa"
-			  "\x15\xd7\x83\x19\xfd\x31\x59\x5b"
-			  "\x32",
-		.clen	= 49,
-	}, {
-		.key	= "\x17\x4d\xc3\xab\xe3\x7d\xc7\x42"
-			  "\x1b\x91\xdd\x0a\x4b\x43\xcf\xba"
-			  "\xf6\xc5\x5c\x80\x9a\xc0\x3b\x01"
-			  "\xa9\x56\x1d\x5b\x8f\x22\xa9\x25",
-		.klen	= 32,
-		.iv	= "\x27\x16\x51\x13\x27\x1c\x71\xc9"
-			  "\x3b\xc8\x0a\x2f\x49\x0c\x00\x3c",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x45\xa8\x6e\xe3\xaf\x5a\xc5\xd7"
-			  "\x7c\x35\x63\x77\x46\x9f\x61\x3f"
-			  "\x56\xd7\xe4\xe3\x5e\xb8\xdc\x14"
-			  "\x3a\x79\xc4\x3e\xb3\x69\x61\x46"
-			  "\x3c\xb6\x83\x4e\xb4\x26\xc7\x73"
-			  "\x22\xda\x52\x8b\x7d\x11\x98\xea"
-			  "\x62\xe1\x14\x1e\xdc\xfe\x0f\xad"
-			  "\x20\x76\x5a\xdc\x4e\x71\x13",
-		.plen	= 63,
-		.ctext	= "\xc9\x82\x3b\x4b\x87\x84\xa5\xdb"
-			  "\xa0\x8c\xd3\x3e\x7f\x8d\xe8\x28"
-			  "\x2a\xdc\xfa\x01\x84\x87\x9a\x70"
-			  "\x81\x75\x37\x0a\xd2\x75\xa9\xb6"
-			  "\x21\x72\xee\x7e\x65\x95\xe5\xcc"
-			  "\x01\xb7\x39\xa6\x51\x15\xca\xff"
-			  "\x61\xdc\x97\x38\xcc\xf4\xca\xc7"
-			  "\x83\x9b\x05\x11\x72\x60\xf0\xb4"
-			  "\x7e\x06\xab\x0a\xc0\xbb\x59\x23"
-			  "\xaa\x2d\xfc\x4e\x35\x05\x59",
-		.clen	= 79,
-	}, {
-		.key	= "\x54\x71\xfd\x4b\xf3\xf9\x6f\x5e"
-			  "\x9c\x6c\x8f\x9c\x45\x68\x92\xc1"
-			  "\x21\x87\x67\x04\x9f\x60\xbd\x1b"
-			  "\x6a\x84\xfc\x34\x6a\x81\x48\xfb",
-		.klen	= 32,
-		.iv	= "\x63\x3b\x8b\xb3\x37\x98\x1a\xe5"
-			  "\xbc\xa2\xbc\xc0\x43\x31\xc2\x42",
-		.assoc	= "",
-		.alen	= 0,
-		.ptext	= "\x81\xcd\xa8\x82\xbf\xd6\x6e\xf3"
-			  "\xfd\x0f\x15\x09\x40\xc3\x24\x45"
-			  "\x81\x99\xf0\x67\x63\x58\x5e\x2e"
-			  "\xfb\xa6\xa3\x16\x8d\xc8\x00\x1c"
-			  "\x4b\x62\x87\x7c\x15\x38\xda\x70"
-			  "\x3d\xea\xe7\xf2\x40\xba\xae\x79"
-			  "\x8f\x48\xfc\xbf\x45\x53\x2e\x78"
-			  "\xef\x79\xf0\x1b\x49\xf7\xfd\x9c",
-		.plen	= 64,
-		.ctext	= "\x11\x7c\x7d\xef\xce\x29\x95\xec"
-			  "\x7e\x9f\x42\xa6\x26\x07\xa1\x75"
-			  "\x2f\x4e\x09\x9a\xf6\x6b\xc2\xfa"
-			  "\x0d\xd0\x17\xdc\x25\x1e\x9b\xdc"
-			  "\x5f\x8c\x1c\x60\x15\x4f\x9b\x20"
-			  "\x7b\xff\xcd\x82\x60\x84\xf4\xa5"
-			  "\x20\x9a\x05\x19\x5b\x02\x0a\x72"
-			  "\x43\x11\x26\x58\xcf\xc5\x41\xcf"
-			  "\x13\xcc\xde\x32\x92\xfa\x86\xf2"
-			  "\xaf\x16\xe8\x8f\xca\xb6\xfd\x54",
-		.clen	= 80,
-	}, {
-		.key	= "\x90\x96\x36\xea\x03\x74\x18\x7a"
-			  "\x1d\x46\x42\x2d\x3f\x8c\x54\xc7"
-			  "\x4b\x4a\x73\x89\xa4\x00\x3f\x34"
-			  "\x2c\xb1\xdb\x0c\x44\xe0\xe8\xd2",
-		.klen	= 32,
-		.iv	= "\xa0\x5f\xc5\x52\x47\x13\xc2\x01"
-			  "\x3d\x7c\x6e\x52\x3d\x55\x85\x48",
-		.assoc	= "\xaf",
-		.alen	= 1,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x9b\xc5\x3b\x20\x0a\x88\x56\xbe"
-			  "\x69\xdf\xc4\xc4\x02\x46\x3a\xf0",
-		.clen	= 16,
-	}, {
-		.key	= "\xcd\xbb\x70\x89\x13\xf0\xc1\x95"
-			  "\x9e\x20\xf4\xbf\x39\xb1\x17\xcd"
-			  "\x76\x0c\x7f\x0d\xa9\xa0\xc1\x4e"
-			  "\xed\xdf\xb9\xe4\x1e\x3f\x87\xa8",
-		.klen	= 32,
-		.iv	= "\xdc\x84\xfe\xf1\x58\x8f\x6b\x1c"
-			  "\xbe\x57\x20\xe3\x37\x7a\x48\x4f",
-		.assoc	= "\xeb\x4d\x8d\x59\x9c\x2e\x15\xa3"
-			  "\xde\x8d\x4d\x07\x36\x43\x78\xd0"
-			  "\x0b\x6d\x84\x4f\x2c\xf0\x82\x5b"
-			  "\x4e\xf6\x29\xd1\x8b\x6f\x56",
-		.alen	= 31,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xe0\x6d\xa1\x07\x98\x2f\x40\x2d"
-			  "\x2e\x9a\xd6\x61\x43\xc0\x74\x69",
-		.clen	= 16,
-	}, {
-		.key	= "\x0a\xe0\xaa\x29\x24\x6c\x6a\xb1"
-			  "\x1f\xfa\xa6\x50\x33\xd5\xda\xd3"
-			  "\xa0\xce\x8a\x91\xae\x40\x43\x68"
-			  "\xae\x0d\x98\xbd\xf8\x9e\x26\x7f",
-		.klen	= 32,
-		.iv	= "\x19\xa9\x38\x91\x68\x0b\x14\x38"
-			  "\x3f\x31\xd2\x74\x31\x9e\x0a\x55",
-		.assoc	= "\x28\x72\xc7\xf8\xac\xaa\xbe\xbf"
-			  "\x5f\x67\xff\x99\x30\x67\x3b\xd6"
-			  "\x35\x2f\x90\xd3\x31\x90\x04\x74"
-			  "\x0f\x23\x08\xa9\x65\xce\xf6\xea",
-		.alen	= 32,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\xb9\x57\x13\x3e\x82\x31\x61\x65"
-			  "\x0d\x7f\x6c\x96\x93\x5c\x50\xe2",
-		.clen	= 16,
-	}, {
-		.key	= "\x46\x04\xe3\xc8\x34\xe7\x12\xcd"
-			  "\xa0\xd4\x58\xe2\x2d\xf9\x9c\xda"
-			  "\xca\x91\x96\x15\xb4\xe0\xc5\x81"
-			  "\x70\x3a\x77\x95\xd2\xfd\xc5\x55",
-		.klen	= 32,
-		.iv	= "\x55\xcd\x72\x30\x78\x86\xbd\x54"
-			  "\xc0\x0b\x84\x06\x2b\xc2\xcd\x5b",
-		.assoc	= "\x64\x97\x00\x98\xbc\x25\x67\xdb"
-			  "\xe0\x41\xb1\x2a\x2a\x8c\xfe\xdd"
-			  "\x5f\xf2\x9c\x58\x36\x30\x86\x8e"
-			  "\xd1\x51\xe6\x81\x3f\x2d\x95\xc1"
-			  "\x01",
-		.alen	= 33,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x81\x96\x34\xde\xbb\x36\xdd\x3e"
-			  "\x4e\x5e\xcb\x44\x21\xb8\x3f\xf1",
-		.clen	= 16,
-	}, {
-		.key	= "\x83\x29\x1d\x67\x44\x63\xbb\xe9"
-			  "\x20\xaf\x0a\x73\x27\x1e\x5f\xe0"
-			  "\xf5\x53\xa1\x9a\xb9\x80\x47\x9b"
-			  "\x31\x68\x56\x6e\xac\x5c\x65\x2c",
-		.klen	= 32,
-		.iv	= "\x92\xf2\xac\xcf\x88\x02\x65\x70"
-			  "\x41\xe5\x36\x97\x25\xe7\x90\x61",
-		.assoc	= "\xa1\xbb\x3a\x37\xcc\xa1\x10\xf7"
-			  "\x61\x1c\x63\xbc\x24\xb0\xc0\xe3"
-			  "\x8a\xb4\xa7\xdc\x3b\xd0\x08\xa8"
-			  "\x92\x7f\xc5\x5a\x19\x8c\x34\x97"
-			  "\x0f\x95\x9b\x18\xe4\x8d\xb4\x24"
-			  "\xb9\x33\x28\x18\xe1\x9d\x14\xe0"
-			  "\x64\xb2\x89\x7d\x78\xa8\x05\x7e"
-			  "\x07\x8c\xfc\x88\x2d\xb8\x53",
-		.alen	= 63,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x2e\x99\xb6\x79\x57\x56\x80\x36"
-			  "\x8e\xc4\x1c\x12\x7d\x71\x36\x0c",
-		.clen	= 16,
-	}, {
-		.key	= "\xbf\x4e\x57\x07\x54\xdf\x64\x05"
-			  "\xa1\x89\xbc\x04\x21\x42\x22\xe6"
-			  "\x1f\x15\xad\x1e\xbe\x20\xc9\xb4"
-			  "\xf3\x95\x35\x46\x86\xbb\x04\x03",
-		.klen	= 32,
-		.iv	= "\xce\x17\xe5\x6f\x98\x7e\x0e\x8c"
-			  "\xc2\xbf\xe8\x29\x1f\x0b\x52\x68",
-		.assoc	= "\xdd\xe0\x74\xd6\xdc\x1d\xb8\x13"
-			  "\xe2\xf6\x15\x4d\x1e\xd4\x83\xe9"
-			  "\xb4\x76\xb3\x60\x40\x70\x8a\xc1"
-			  "\x53\xac\xa4\x32\xf3\xeb\xd3\x6e"
-			  "\x1e\x42\xa0\x46\x45\x9f\xc7\x22"
-			  "\xd3\x43\xbc\x7e\xa5\x47\x2a\x6f"
-			  "\x91\x19\x70\x1e\xe1\xfe\x25\x49"
-			  "\xd6\x8f\x93\xc7\x28\x3f\x3d\x03",
-		.alen	= 64,
-		.ptext	= "",
-		.plen	= 0,
-		.ctext	= "\x7b\x25\x3d\x47\xd4\xa7\x08\xce"
-			  "\x3b\x89\x40\x36\xba\x6d\x0e\xa2",
-		.clen	= 16,
-	}, {
-		.key	= "\xfc\x72\x90\xa6\x64\x5a\x0d\x21"
-			  "\x22\x63\x6e\x96\x1b\x67\xe4\xec"
-			  "\x49\xd7\xb9\xa2\xc3\xc0\x4b\xce"
-			  "\xb4\xc3\x14\x1e\x61\x1a\xa3\xd9",
-		.klen	= 32,
-		.iv	= "\x0b\x3c\x1f\x0e\xa8\xf9\xb7\xa7"
-			  "\x42\x9a\x9a\xba\x19\x30\x15\x6e",
-		.assoc	= "\x1a",
-		.alen	= 1,
-		.ptext	= "\x29",
-		.plen	= 1,
-		.ctext	= "\xe6\x09\x6f\x95\x9a\x18\xc8\xf6"
-			  "\x17\x75\x81\x16\xdf\x26\xff\x67"
-			  "\x92",
-		.clen	= 17,
-	}, {
-		.key	= "\x38\x97\xca\x45\x74\xd6\xb6\x3c"
-			  "\xa3\x3d\x20\x27\x15\x8b\xa7\xf2"
-			  "\x74\x9a\xc4\x27\xc8\x60\xcd\xe8"
-			  "\x75\xf0\xf2\xf7\x3b\x79\x42\xb0",
-		.klen	= 32,
-		.iv	= "\x47\x60\x59\xad\xb8\x75\x60\xc3"
-			  "\xc3\x74\x4c\x4c\x13\x54\xd8\x74",
-		.assoc	= "\x56\x29\xe7\x15\xfc\x14\x0a\x4a"
-			  "\xe4\xaa\x79\x70\x12\x1d\x08\xf6"
-			  "\x09\xfb\xca\x69\x4b\xb0\x8e\xf5"
-			  "\xd6\x07\x62\xe3\xa8\xa9\x12",
-		.alen	= 31,
-		.ptext	= "\x66\xf3\x75\x7d\x40\xb3\xb4\xd1"
-			  "\x04\xe1\xa6\x94\x10\xe6\x39\x77"
-			  "\xd3\xac\x4d\x8a\x8c\x58\x6e\xfb"
-			  "\x06\x13\x9a\xd9\x5e\xc0\xfa",
-		.plen	= 31,
-		.ctext	= "\x82\xc0\x56\xf0\xd7\xc4\xc9\xfd"
-			  "\x3c\xd1\x2a\xd4\x15\x86\x9d\xda"
-			  "\xea\x6c\x6f\xa1\x33\xb0\x7a\x01"
-			  "\x57\xe7\xf3\x7b\x73\xe7\x54\x10"
-			  "\xc6\x91\xe2\xc6\xa0\x69\xe7\xe6"
-			  "\x76\xc3\xf5\x3a\x76\xfd\x4a",
-		.clen	= 47,
-	}, {
-		.key	= "\x75\xbc\x04\xe5\x84\x52\x5e\x58"
-			  "\x24\x17\xd2\xb9\x0e\xaf\x6a\xf9"
-			  "\x9e\x5c\xd0\xab\xcd\x00\x4f\x01"
-			  "\x37\x1e\xd1\xcf\x15\xd8\xe2\x86",
-		.klen	= 32,
-		.iv	= "\x84\x85\x92\x4d\xc8\xf1\x08\xdf"
-			  "\x44\x4e\xff\xdd\x0d\x78\x9a\x7a",
-		.assoc	= "\x93\x4e\x21\xb4\x0c\x90\xb3\x66"
-			  "\x65\x84\x2b\x01\x0b\x42\xcb\xfc"
-			  "\x33\xbd\xd6\xed\x50\x50\x10\x0e"
-			  "\x97\x35\x41\xbb\x82\x08\xb1\xf2",
-		.alen	= 32,
-		.ptext	= "\xa2\x17\xaf\x1c\x50\x2e\x5d\xed"
-			  "\x85\xbb\x58\x26\x0a\x0b\xfc\x7d"
-			  "\xfe\x6e\x59\x0e\x91\xf8\xf0\x15"
-			  "\xc8\x40\x78\xb1\x38\x1f\x99\xa7",
-		.plen	= 32,
-		.ctext	= "\x01\x47\x8e\x6c\xf6\x64\x89\x3a"
-			  "\x71\xce\xe4\xaa\x45\x70\xe6\x84"
-			  "\x62\x48\x08\x64\x86\x6a\xdf\xec"
-			  "\xb4\xa0\xfb\x34\x03\x0c\x19\xf4"
-			  "\x2b\x7b\x36\x73\xec\x54\xa9\x1e"
-			  "\x30\x85\xdb\xe4\xac\xe9\x2c\xca",
-		.clen	= 48,
-	}, {
-		.key	= "\xb1\xe1\x3e\x84\x94\xcd\x07\x74"
-			  "\xa5\xf2\x84\x4a\x08\xd4\x2c\xff"
-			  "\xc8\x1e\xdb\x2f\xd2\xa0\xd1\x1b"
-			  "\xf8\x4c\xb0\xa8\xef\x37\x81\x5d",
-		.klen	= 32,
-		.iv	= "\xc0\xaa\xcc\xec\xd8\x6c\xb1\xfb"
-			  "\xc5\x28\xb1\x6e\x07\x9d\x5d\x81",
-		.assoc	= "\xd0\x73\x5a\x54\x1d\x0b\x5b\x82"
-			  "\xe5\x5f\xdd\x93\x05\x66\x8e\x02"
-			  "\x5e\x80\xe1\x71\x55\xf0\x92\x28"
-			  "\x59\x62\x20\x94\x5c\x67\x50\xc8"
-			  "\x58",
-		.alen	= 33,
-		.ptext	= "\xdf\x3c\xe9\xbc\x61\xaa\x06\x09"
-			  "\x06\x95\x0a\xb7\x04\x2f\xbe\x84"
-			  "\x28\x30\x64\x92\x96\x98\x72\x2e"
-			  "\x89\x6e\x57\x8a\x13\x7e\x38\x7e"
-			  "\xdb",
-		.plen	= 33,
-		.ctext	= "\x85\xe0\xf8\x0f\x8e\x49\xe3\x60"
-			  "\xcb\x4a\x54\x94\xcf\xf5\x7e\x34"
-			  "\xe9\xf8\x80\x65\x53\xd0\x72\x70"
-			  "\x4f\x7d\x9d\xd1\x15\x6f\xb9\x2c"
-			  "\xfa\xe8\xdd\xac\x2e\xe1\x3f\x67"
-			  "\x63\x0f\x1a\x59\xb7\x89\xdb\xf4"
-			  "\xc3",
-		.clen	= 49,
-	}, {
-		.key	= "\xee\x05\x77\x23\xa5\x49\xb0\x90"
-			  "\x26\xcc\x36\xdc\x02\xf8\xef\x05"
-			  "\xf3\xe1\xe7\xb3\xd8\x40\x53\x35"
-			  "\xb9\x79\x8f\x80\xc9\x96\x20\x33",
-		.klen	= 32,
-		.iv	= "\xfd\xce\x06\x8b\xe9\xe8\x5a\x17"
-			  "\x46\x02\x63\x00\x01\xc1\x20\x87",
-		.assoc	= "\x0c\x98\x94\xf3\x2d\x87\x04\x9e"
-			  "\x66\x39\x8f\x24\xff\x8a\x50\x08"
-			  "\x88\x42\xed\xf6\x5a\x90\x14\x42"
-			  "\x1a\x90\xfe\x6c\x36\xc6\xf0\x9f"
-			  "\x66\xa0\xb5\x2d\x2c\xf8\x25\x15"
-			  "\x55\x90\xa2\x7e\x77\x94\x96\x3a"
-			  "\x71\x1c\xf7\x44\xee\xa8\xc3\x42"
-			  "\xe2\xa3\x84\x04\x0b\xe1\xce",
-		.alen	= 63,
-		.ptext	= "\x1b\x61\x23\x5b\x71\x26\xae\x25"
-			  "\x87\x6f\xbc\x49\xfe\x53\x81\x8a"
-			  "\x53\xf2\x70\x17\x9b\x38\xf4\x48"
-			  "\x4b\x9b\x36\x62\xed\xdd\xd8\x54"
-			  "\xea\xcb\xb6\x79\x45\xfc\xaa\x54"
-			  "\x5c\x94\x47\x58\xa7\xff\x9c\x9e"
-			  "\x7c\xb6\xf1\xac\xc8\xfd\x8b\x35"
-			  "\xd5\xa4\x6a\xd4\x09\xc2\x08",
-		.plen	= 63,
-		.ctext	= "\x00\xe5\x5b\x87\x5c\x20\x22\x8a"
-			  "\xda\x1f\xd3\xff\xbb\xb2\xb0\xf8"
-			  "\xef\xe9\xeb\x9e\x7c\x80\xf4\x2b"
-			  "\x59\xc0\x79\xbc\x17\xa0\x15\x01"
-			  "\xf5\x72\xfb\x5a\xe7\xaf\x07\xe3"
-			  "\x1b\x49\x21\x34\x23\x63\x55\x5e"
-			  "\xee\x4f\x34\x17\xfa\xfe\xa5\x0c"
-			  "\xed\x0b\x23\xea\x9b\xda\x57\x2f"
-			  "\xf6\xa9\xae\x0d\x4e\x40\x96\x45"
-			  "\x7f\xfa\xf0\xbf\xc4\x98\x78",
-		.clen	= 79,
-	}, {
-		.key	= "\x2a\x2a\xb1\xc3\xb5\xc5\x59\xac"
-			  "\xa7\xa6\xe8\x6d\xfc\x1d\xb2\x0b"
-			  "\x1d\xa3\xf3\x38\xdd\xe0\xd5\x4e"
-			  "\x7b\xa7\x6e\x58\xa3\xf5\xbf\x0a",
-		.klen	= 32,
-		.iv	= "\x39\xf3\x3f\x2b\xf9\x64\x03\x33"
-			  "\xc7\xdd\x15\x91\xfb\xe6\xe2\x8d",
-		.assoc	= "\x49\xbc\xce\x92\x3d\x02\xad\xba"
-			  "\xe7\x13\x41\xb6\xf9\xaf\x13\x0f"
-			  "\xb2\x04\xf8\x7a\x5f\x30\x96\x5b"
-			  "\xdc\xbd\xdd\x44\x10\x25\x8f\x75"
-			  "\x75\x4d\xb9\x5b\x8e\x0a\x38\x13"
-			  "\x6f\x9f\x36\xe4\x3a\x3e\xac\xc9"
-			  "\x9d\x83\xde\xe5\x57\xfd\xe3\x0e"
-			  "\xb1\xa7\x1b\x44\x05\x67\xb7\x37",
-		.alen	= 64,
-		.ptext	= "\x58\x85\x5c\xfa\x81\xa1\x57\x40"
-			  "\x08\x4a\x6e\xda\xf8\x78\x44\x90"
-			  "\x7d\xb5\x7b\x9b\xa1\xd8\x76\x62"
-			  "\x0c\xc9\x15\x3b\xc7\x3c\x77\x2b"
-			  "\xf8\x78\xba\xa7\xa6\x0e\xbd\x52"
-			  "\x76\xa3\xdc\xbe\x6b\xa8\xb1\x2d"
-			  "\xa9\x1d\xd8\x4e\x31\x53\xab\x00"
-			  "\xa5\xa7\x01\x13\x04\x49\xf2\x04",
-		.plen	= 64,
-		.ctext	= "\x28\xdd\xb9\x4a\x12\xc7\x0a\xe1"
-			  "\x58\x06\x1a\x9b\x8c\x67\xdf\xeb"
-			  "\x35\x35\x60\x9d\x06\x40\x65\xc1"
-			  "\x93\xe8\xb3\x82\x50\x29\xdd\xb5"
-			  "\x2b\xcb\xde\x18\x78\x6b\x42\xbe"
-			  "\x6d\x24\xd0\xb2\x7d\xd7\x08\x8f"
-			  "\x4a\x18\x98\xad\x8c\xf2\x97\xb4"
-			  "\xf4\x77\xe4\xbf\x41\x3b\xc4\x06"
-			  "\xce\x9e\x34\x81\xf0\x89\x11\x13"
-			  "\x02\x65\xa1\x7c\xdf\x07\x33\x06",
-		.clen	= 80,
-	}, {
-		.key	= "\x67\x4f\xeb\x62\xc5\x40\x01\xc7"
-			  "\x28\x80\x9a\xfe\xf6\x41\x74\x12"
-			  "\x48\x65\xfe\xbc\xe2\x80\x57\x68"
-			  "\x3c\xd4\x4d\x31\x7d\x54\x5f\xe1",
-		.klen	= 32,
-		.iv	= "\x76\x18\x79\xca\x09\xdf\xac\x4e"
-			  "\x48\xb7\xc7\x23\xf5\x0a\xa5\x93",
-		.assoc	= "\x85\xe1\x08\x32\x4d\x7e\x56\xd5"
-			  "\x68\xed\xf3\x47\xf3\xd3\xd6\x15"
-			  "\xdd\xc7\x04\xfe\x64\xd0\x18\x75"
-			  "\x9d\xeb\xbc\x1d\xea\x84\x2e\x4c"
-			  "\x83\xf9\xbe\x8a\xef\x1c\x4b\x10"
-			  "\x89\xaf\xcb\x4b\xfe\xe7\xc1\x58"
-			  "\xca\xea\xc6\x87\xc0\x53\x03\xd9"
-			  "\x80\xaa\xb2\x83\xff\xee\xa1\x6a"
-			  "\x04",
-		.alen	= 65,
-		.ptext	= "\x94\xaa\x96\x9a\x91\x1d\x00\x5c"
-			  "\x88\x24\x20\x6b\xf2\x9c\x06\x96"
-			  "\xa7\x77\x87\x1f\xa6\x78\xf8\x7b"
-			  "\xcd\xf6\xf4\x13\xa1\x9b\x16\x02"
-			  "\x07\x24\xbf\xd5\x08\x20\xd0\x4f"
-			  "\x90\xb3\x70\x24\x2f\x51\xc7\xbb"
-			  "\xd6\x84\xc0\xef\x9a\xa8\xca\xcc"
-			  "\x74\xab\x97\x53\xfe\xd0\xdb\x37"
-			  "\x37\x6a\x0e\x9f\x3f\xa3\x2a\xe3"
-			  "\x1b\x34\x6d\x51\x72\x2b\x17\xe7"
-			  "\x4d\xaa\x2c\x18\xda\xa3\x33\x89"
-			  "\x2a\x9f\xf4\xd2\xed\x76\x3d\x3f"
-			  "\x3c\x15\x9d\x8e\x4f\x3c\x27\xb0"
-			  "\x42\x3f\x2f\x8a\xd4\xc2\x10\xb2"
-			  "\x27\x7f\xe3\x34\x80\x02\x49\x4b"
-			  "\x07\x68\x22\x2a\x88\x25\x53\xb2"
-			  "\x2f",
-		.plen	= 129,
-		.ctext	= "\x85\x39\x69\x35\xfb\xf9\xb0\xa6"
-			  "\x85\x43\x88\xd0\xd7\x78\x60\x19"
-			  "\x3e\x1f\xb1\xa4\xd6\xc5\x96\xec"
-			  "\xf7\x84\x85\xc7\x27\x0f\x74\x57"
-			  "\x28\x9e\xdd\x90\x3c\x43\x12\xc5"
-			  "\x51\x3d\x39\x8f\xa5\xf4\xe0\x0b"
-			  "\x57\x04\xf1\x6d\xfe\x9b\x84\x27"
-			  "\xe8\xeb\x4d\xda\x02\x0a\xc5\x49"
-			  "\x1a\x55\x5e\x50\x56\x4d\x94\xda"
-			  "\x20\xf8\x12\x54\x50\xb3\x11\xda"
-			  "\xed\x44\x27\x67\xd5\xd1\x8b\x4b"
-			  "\x38\x67\x56\x65\x59\xda\xe6\x97"
-			  "\x81\xae\x2f\x92\x3b\xae\x22\x1c"
-			  "\x91\x59\x38\x18\x00\xe8\xba\x92"
-			  "\x04\x19\x56\xdf\xb0\x82\xeb\x6f"
-			  "\x2e\xdb\x54\x3c\x4b\xbb\x60\x90"
-			  "\x4c\x50\x10\x62\xba\x7a\xb1\x68"
-			  "\x37\xd7\x87\x4e\xe4\x66\x09\x1f"
-			  "\xa5",
-		.clen	= 145,
-	}, {
-		.key	= "\xa3\x73\x24\x01\xd5\xbc\xaa\xe3"
-			  "\xa9\x5a\x4c\x90\xf0\x65\x37\x18"
-			  "\x72\x28\x0a\x40\xe7\x20\xd9\x82"
-			  "\xfe\x02\x2b\x09\x57\xb3\xfe\xb7",
-		.klen	= 32,
-		.iv	= "\xb3\x3d\xb3\x69\x19\x5b\x54\x6a"
-			  "\xc9\x91\x79\xb4\xef\x2e\x68\x99",
-		.assoc	= "\xc2\x06\x41\xd1\x5d\xfa\xff\xf1"
-			  "\xe9\xc7\xa5\xd9\xed\xf8\x98\x1b"
-			  "\x07\x89\x10\x82\x6a\x70\x9a\x8f"
-			  "\x5e\x19\x9b\xf5\xc5\xe3\xcd\x22"
-			  "\x92\xa5\xc2\xb8\x51\x2e\x5e\x0e"
-			  "\xa4\xbe\x5f\xb1\xc1\x90\xd7\xe7"
-			  "\xf7\x52\xae\x28\x29\xa8\x22\xa4"
-			  "\x4f\xae\x48\xc2\xfa\x75\x8b\x9e"
-			  "\xce\x83\x2a\x88\x07\x55\xbb\x89"
-			  "\xf6\xdf\xac\xdf\x83\x08\xbf\x7d"
-			  "\xac\x30\x8b\x8e\x02\xac\x00\xf1"
-			  "\x30\x46\xe1\xbc\x75\xbf\x49\xbb"
-			  "\x26\x4e\x29\xf0\x2f\x21\xc6\x13"
-			  "\x92\xd9\x3d\x11\xe4\x10\x00\x8e"
-			  "\xd4\xd4\x58\x65\xa6\x2b\xe3\x25"
-			  "\xb1\x8f\x15\x93\xe7\x71\xb9\x2c"
-			  "\x4b",
-		.alen	= 129,
-		.ptext	= "\xd1\xcf\xd0\x39\xa1\x99\xa9\x78"
-			  "\x09\xfe\xd2\xfd\xec\xc1\xc9\x9d"
-			  "\xd2\x39\x93\xa3\xab\x18\x7a\x95"
-			  "\x8f\x24\xd3\xeb\x7b\xfa\xb5\xd8"
-			  "\x15\xd1\xc3\x04\x69\x32\xe3\x4d"
-			  "\xaa\xc2\x04\x8b\xf2\xfa\xdc\x4a"
-			  "\x02\xeb\xa8\x90\x03\xfd\xea\x97"
-			  "\x43\xaf\x2e\x92\xf8\x57\xc5\x6a"
-			  "\x00",
-		.plen	= 65,
-		.ctext	= "\x7d\xde\x53\x22\xe4\x23\x3b\x30"
-			  "\x78\xde\x35\x90\x7a\xd9\x0b\x93"
-			  "\xf6\x0e\x0b\xed\x40\xee\x10\x9c"
-			  "\x96\x3a\xd3\x34\xb2\xd0\x67\xcf"
-			  "\x63\x7f\x2d\x0c\xcf\x96\xec\x64"
-			  "\x1a\x87\xcc\x7d\x2c\x5e\x81\x4b"
-			  "\xd2\x8f\x4c\x7c\x00\xb1\xb4\xe0"
-			  "\x87\x4d\xb1\xbc\xd8\x78\x2c\x17"
-			  "\xf2\x3b\xd8\x28\x40\xe2\x76\xf6"
-			  "\x20\x13\x83\x46\xaf\xff\xe3\x0f"
-			  "\x72",
-		.clen	= 81,
-	}, {
-		.key	= "\xe0\x98\x5e\xa1\xe5\x38\x53\xff"
-			  "\x2a\x35\xfe\x21\xea\x8a\xfa\x1e"
-			  "\x9c\xea\x15\xc5\xec\xc0\x5b\x9b"
-			  "\xbf\x2f\x0a\xe1\x32\x12\x9d\x8e",
-		.klen	= 32,
-		.iv	= "\xef\x61\xed\x08\x29\xd7\xfd\x86"
-			  "\x4a\x6b\x2b\x46\xe9\x53\x2a\xa0",
-		.assoc	= "\xfe\x2a\x7b\x70\x6d\x75\xa7\x0d"
-			  "\x6a\xa2\x57\x6a\xe7\x1c\x5b\x21"
-			  "\x31\x4b\x1b\x07\x6f\x10\x1c\xa8"
-			  "\x20\x46\x7a\xce\x9f\x42\x6d\xf9",
-		.alen	= 32,
-		.ptext	= "\x0d\xf4\x09\xd8\xb1\x14\x51\x94"
-			  "\x8a\xd8\x84\x8e\xe6\xe5\x8c\xa3"
-			  "\xfc\xfc\x9e\x28\xb0\xb8\xfc\xaf"
-			  "\x50\x52\xb1\xc4\x55\x59\x55\xaf",
-		.plen	= 32,
-		.ctext	= "\x5a\xcd\x8c\x57\xf2\x6a\xb6\xbe"
-			  "\x53\xc7\xaa\x9a\x60\x74\x9c\xc4"
-			  "\xa2\xc2\xd0\x6d\xe1\x03\x63\xdc"
-			  "\xbb\x51\x7e\x9c\x89\x73\xde\x4e"
-			  "\x24\xf8\x52\x7c\x15\x41\x0e\xba"
-			  "\x69\x0e\x36\x5f\x2f\x22\x8c",
-		.clen	= 47,
-	}, {
-		.key	= "\x1c\xbd\x98\x40\xf5\xb3\xfc\x1b"
-			  "\xaa\x0f\xb0\xb3\xe4\xae\xbc\x24"
-			  "\xc7\xac\x21\x49\xf1\x60\xdd\xb5"
-			  "\x80\x5d\xe9\xba\x0c\x71\x3c\x64",
-		.klen	= 32,
-		.iv	= "\x2c\x86\x26\xa8\x39\x52\xa6\xa2"
-			  "\xcb\x45\xdd\xd7\xe3\x77\xed\xa6",
-		.assoc	= "\x3b\x4f\xb5\x10\x7d\xf1\x50\x29"
-			  "\xeb\x7c\x0a\xfb\xe1\x40\x1e\x27"
-			  "\x5c\x0d\x27\x8b\x74\xb0\x9e\xc2"
-			  "\xe1\x74\x59\xa6\x79\xa1\x0c\xd0",
-		.alen	= 32,
-		.ptext	= "\x4a\x18\x43\x77\xc1\x90\xfa\xb0"
-			  "\x0b\xb2\x36\x20\xe0\x09\x4e\xa9"
-			  "\x26\xbe\xaa\xac\xb5\x58\x7e\xc8"
-			  "\x11\x7f\x90\x9c\x2f\xb8\xf4\x85",
-		.plen	= 32,
-		.ctext	= "\x47\xd6\xce\x78\xd6\xbf\x4a\x51"
-			  "\xb8\xda\x92\x3c\xfd\xda\xac\x8e"
-			  "\x8d\x88\xd7\x4d\x90\xe5\xeb\xa1"
-			  "\xab\xd6\x7c\x76\xad\xea\x7d\x76"
-			  "\x53\xee\xb0\xcd\xd0\x02\xbb\x70"
-			  "\x5b\x6f\x7b\xe2\x8c\xe8",
-		.clen	= 46,
-	}, {
-		.key	= "\x59\xe1\xd2\xdf\x05\x2f\xa4\x37"
-			  "\x2b\xe9\x63\x44\xde\xd3\x7f\x2b"
-			  "\xf1\x6f\x2d\xcd\xf6\x00\x5f\xcf"
-			  "\x42\x8a\xc8\x92\xe6\xd0\xdc\x3b",
-		.klen	= 32,
-		.iv	= "\x68\xab\x60\x47\x49\xce\x4f\xbe"
-			  "\x4c\x20\x8f\x68\xdd\x9c\xb0\xac",
-		.assoc	= "\x77\x74\xee\xaf\x8d\x6d\xf9\x45"
-			  "\x6c\x56\xbc\x8d\xdb\x65\xe0\x2e"
-			  "\x86\xd0\x32\x0f\x79\x50\x20\xdb"
-			  "\xa2\xa1\x37\x7e\x53\x00\xab\xa6",
-		.alen	= 32,
-		.ptext	= "\x86\x3d\x7d\x17\xd1\x0c\xa3\xcc"
-			  "\x8c\x8d\xe8\xb1\xda\x2e\x11\xaf"
-			  "\x51\x80\xb5\x30\xba\xf8\x00\xe2"
-			  "\xd3\xad\x6f\x75\x09\x18\x93\x5c",
-		.plen	= 32,
-		.ctext	= "\x9f\xa9\x2b\xa4\x8f\x00\x05\x2b"
-			  "\xe7\x68\x81\x51\xbb\xfb\xdf\x60"
-			  "\xbb\xac\xe8\xc1\xdc\x68\xae\x68"
-			  "\x3a\xcd\x7a\x06\x49\xfe\x80\x11"
-			  "\xe6\x61\x99\xe2\xdd\xbe\x2c\xbf",
-		.clen	= 40,
-	}, {
-		.key	= "\x96\x06\x0b\x7f\x15\xab\x4d\x53"
-			  "\xac\xc3\x15\xd6\xd8\xf7\x42\x31"
-			  "\x1b\x31\x38\x51\xfc\xa0\xe1\xe8"
-			  "\x03\xb8\xa7\x6b\xc0\x2f\x7b\x11",
-		.klen	= 32,
-		.iv	= "\xa5\xcf\x9a\xe6\x59\x4a\xf7\xd9"
-			  "\xcd\xfa\x41\xfa\xd7\xc0\x72\xb2",
-		.assoc	= "\xb4\x99\x28\x4e\x9d\xe8\xa2\x60"
-			  "\xed\x30\x6e\x1e\xd5\x89\xa3\x34"
-			  "\xb1\x92\x3e\x93\x7e\xf0\xa2\xf5"
-			  "\x64\xcf\x16\x57\x2d\x5f\x4a\x7d",
-		.alen	= 32,
-		.ptext	= "\xc3\x62\xb7\xb6\xe2\x87\x4c\xe7"
-			  "\x0d\x67\x9a\x43\xd4\x52\xd4\xb5"
-			  "\x7b\x43\xc1\xb5\xbf\x98\x82\xfc"
-			  "\x94\xda\x4e\x4d\xe4\x77\x32\x32",
-		.plen	= 32,
-		.ctext	= "\xe2\x34\xfa\x25\xfd\xfb\x89\x5e"
-			  "\x5b\x4e\x0b\x15\x6e\x39\xfb\x0c"
-			  "\x73\xc7\xd9\x6b\xbe\xce\x9b\x70"
-			  "\xc7\x4f\x96\x16\x03\xfc\xea\xfb"
-			  "\x56",
-		.clen	= 33,
-	},
-};
-
-/*
  * All key wrapping test vectors taken from
  * http://csrc.nist.gov/groups/STM/cavp/documents/mac/kwtestvectors.zip
  *
@@ -32454,6 +29763,86 @@
 	},
 };
 
+static const struct comp_testvec lzorle_comp_tv_template[] = {
+	{
+		.inlen	= 70,
+		.outlen	= 59,
+		.input	= "Join us now and share the software "
+			"Join us now and share the software ",
+		.output	= "\x11\x01\x00\x0d\x4a\x6f\x69\x6e"
+			  "\x20\x75\x73\x20\x6e\x6f\x77\x20"
+			  "\x61\x6e\x64\x20\x73\x68\x61\x72"
+			  "\x65\x20\x74\x68\x65\x20\x73\x6f"
+			  "\x66\x74\x77\x70\x01\x32\x88\x00"
+			  "\x0c\x65\x20\x74\x68\x65\x20\x73"
+			  "\x6f\x66\x74\x77\x61\x72\x65\x20"
+			  "\x11\x00\x00",
+	}, {
+		.inlen	= 159,
+		.outlen	= 133,
+		.input	= "This document describes a compression method based on the LZO "
+			"compression algorithm.  This document defines the application of "
+			"the LZO algorithm used in UBIFS.",
+		.output	= "\x11\x01\x00\x2c\x54\x68\x69\x73"
+			  "\x20\x64\x6f\x63\x75\x6d\x65\x6e"
+			  "\x74\x20\x64\x65\x73\x63\x72\x69"
+			  "\x62\x65\x73\x20\x61\x20\x63\x6f"
+			  "\x6d\x70\x72\x65\x73\x73\x69\x6f"
+			  "\x6e\x20\x6d\x65\x74\x68\x6f\x64"
+			  "\x20\x62\x61\x73\x65\x64\x20\x6f"
+			  "\x6e\x20\x74\x68\x65\x20\x4c\x5a"
+			  "\x4f\x20\x2a\x8c\x00\x09\x61\x6c"
+			  "\x67\x6f\x72\x69\x74\x68\x6d\x2e"
+			  "\x20\x20\x2e\x54\x01\x03\x66\x69"
+			  "\x6e\x65\x73\x20\x74\x06\x05\x61"
+			  "\x70\x70\x6c\x69\x63\x61\x74\x76"
+			  "\x0a\x6f\x66\x88\x02\x60\x09\x27"
+			  "\xf0\x00\x0c\x20\x75\x73\x65\x64"
+			  "\x20\x69\x6e\x20\x55\x42\x49\x46"
+			  "\x53\x2e\x11\x00\x00",
+	},
+};
+
+static const struct comp_testvec lzorle_decomp_tv_template[] = {
+	{
+		.inlen	= 133,
+		.outlen	= 159,
+		.input	= "\x00\x2b\x54\x68\x69\x73\x20\x64"
+			  "\x6f\x63\x75\x6d\x65\x6e\x74\x20"
+			  "\x64\x65\x73\x63\x72\x69\x62\x65"
+			  "\x73\x20\x61\x20\x63\x6f\x6d\x70"
+			  "\x72\x65\x73\x73\x69\x6f\x6e\x20"
+			  "\x6d\x65\x74\x68\x6f\x64\x20\x62"
+			  "\x61\x73\x65\x64\x20\x6f\x6e\x20"
+			  "\x74\x68\x65\x20\x4c\x5a\x4f\x2b"
+			  "\x8c\x00\x0d\x61\x6c\x67\x6f\x72"
+			  "\x69\x74\x68\x6d\x2e\x20\x20\x54"
+			  "\x68\x69\x73\x2a\x54\x01\x02\x66"
+			  "\x69\x6e\x65\x73\x94\x06\x05\x61"
+			  "\x70\x70\x6c\x69\x63\x61\x74\x76"
+			  "\x0a\x6f\x66\x88\x02\x60\x09\x27"
+			  "\xf0\x00\x0c\x20\x75\x73\x65\x64"
+			  "\x20\x69\x6e\x20\x55\x42\x49\x46"
+			  "\x53\x2e\x11\x00\x00",
+		.output	= "This document describes a compression method based on the LZO "
+			"compression algorithm.  This document defines the application of "
+			"the LZO algorithm used in UBIFS.",
+	}, {
+		.inlen	= 59,
+		.outlen	= 70,
+		.input	= "\x11\x01\x00\x0d\x4a\x6f\x69\x6e"
+			  "\x20\x75\x73\x20\x6e\x6f\x77\x20"
+			  "\x61\x6e\x64\x20\x73\x68\x61\x72"
+			  "\x65\x20\x74\x68\x65\x20\x73\x6f"
+			  "\x66\x74\x77\x70\x01\x32\x88\x00"
+			  "\x0c\x65\x20\x74\x68\x65\x20\x73"
+			  "\x6f\x66\x74\x77\x61\x72\x65\x20"
+			  "\x11\x00\x00",
+		.output	= "Join us now and share the software "
+			"Join us now and share the software ",
+	},
+};
+
 /*
  * Michael MIC test vectors from IEEE 802.11i
  */
@@ -33681,4 +31070,501 @@
 			  "functions.",
 	},
 };
+
+/* based on aes_cbc_tv_template */
+static const struct cipher_testvec essiv_aes_cbc_tv_template[] = {
+	{
+		.key    = "\x06\xa9\x21\x40\x36\xb8\xa1\x5b"
+			  "\x51\x2e\x03\xd5\x34\x12\x00\x06",
+		.klen   = 16,
+		.iv	= "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "Single block msg",
+		.ctext	= "\xfa\x59\xe7\x5f\x41\x56\x65\xc3"
+			  "\x36\xca\x6b\x72\x10\x9f\x8c\xd4",
+		.len	= 16,
+	}, {
+		.key    = "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0"
+			  "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a",
+		.klen   = 16,
+		.iv     = "\x56\x2e\x17\x99\x6d\x09\x3d\x28"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.ctext	= "\xc8\x59\x9a\xfe\x79\xe6\x7b\x20"
+			  "\x06\x7d\x55\x0a\x5e\xc7\xb5\xa7"
+			  "\x0b\x9c\x80\xd2\x15\xa1\xb8\x6d"
+			  "\xc6\xab\x7b\x65\xd9\xfd\x88\xeb",
+		.len	= 32,
+	}, {
+		.key	= "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
+			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+		.klen	= 24,
+		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ctext	= "\x96\x6d\xa9\x7a\x42\xe6\x01\xc7"
+			  "\x17\xfc\xa7\x41\xd3\x38\x0b\xe5"
+			  "\x51\x48\xf7\x7e\x5e\x26\xa9\xfe"
+			  "\x45\x72\x1c\xd9\xde\xab\xf3\x4d"
+			  "\x39\x47\xc5\x4f\x97\x3a\x55\x63"
+			  "\x80\x29\x64\x4c\x33\xe8\x21\x8a"
+			  "\x6a\xef\x6b\x6a\x8f\x43\xc0\xcb"
+			  "\xf0\xf3\x6e\x74\x54\x44\x92\x44",
+		.len	= 64,
+	}, {
+		.key	= "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+		.klen	= 32,
+		.iv	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.ctext	= "\x24\x52\xf1\x48\x74\xd0\xa7\x93"
+			  "\x75\x9b\x63\x46\xc0\x1c\x1e\x17"
+			  "\x4d\xdc\x5b\x3a\x27\x93\x2a\x63"
+			  "\xf7\xf1\xc7\xb3\x54\x56\x5b\x50"
+			  "\xa3\x31\xa5\x8b\xd6\xfd\xb6\x3c"
+			  "\x8b\xf6\xf2\x45\x05\x0c\xc8\xbb"
+			  "\x32\x0b\x26\x1c\xe9\x8b\x02\xc0"
+			  "\xb2\x6f\x37\xa7\x5b\xa8\xa9\x42",
+		.len	= 64,
+	}, {
+		.key	= "\xC9\x83\xA6\xC9\xEC\x0F\x32\x55"
+			  "\x0F\x32\x55\x78\x9B\xBE\x78\x9B"
+			  "\xBE\xE1\x04\x27\xE1\x04\x27\x4A"
+			  "\x6D\x90\x4A\x6D\x90\xB3\xD6\xF9",
+		.klen	= 32,
+		.iv	= "\xE7\x82\x1D\xB8\x53\x11\xAC\x47"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00",
+		.ptext	= "\x50\xB9\x22\xAE\x17\x80\x0C\x75"
+			  "\xDE\x47\xD3\x3C\xA5\x0E\x9A\x03"
+			  "\x6C\xF8\x61\xCA\x33\xBF\x28\x91"
+			  "\x1D\x86\xEF\x58\xE4\x4D\xB6\x1F"
+			  "\xAB\x14\x7D\x09\x72\xDB\x44\xD0"
+			  "\x39\xA2\x0B\x97\x00\x69\xF5\x5E"
+			  "\xC7\x30\xBC\x25\x8E\x1A\x83\xEC"
+			  "\x55\xE1\x4A\xB3\x1C\xA8\x11\x7A"
+			  "\x06\x6F\xD8\x41\xCD\x36\x9F\x08"
+			  "\x94\xFD\x66\xF2\x5B\xC4\x2D\xB9"
+			  "\x22\x8B\x17\x80\xE9\x52\xDE\x47"
+			  "\xB0\x19\xA5\x0E\x77\x03\x6C\xD5"
+			  "\x3E\xCA\x33\x9C\x05\x91\xFA\x63"
+			  "\xEF\x58\xC1\x2A\xB6\x1F\x88\x14"
+			  "\x7D\xE6\x4F\xDB\x44\xAD\x16\xA2"
+			  "\x0B\x74\x00\x69\xD2\x3B\xC7\x30"
+			  "\x99\x02\x8E\xF7\x60\xEC\x55\xBE"
+			  "\x27\xB3\x1C\x85\x11\x7A\xE3\x4C"
+			  "\xD8\x41\xAA\x13\x9F\x08\x71\xFD"
+			  "\x66\xCF\x38\xC4\x2D\x96\x22\x8B"
+			  "\xF4\x5D\xE9\x52\xBB\x24\xB0\x19"
+			  "\x82\x0E\x77\xE0\x49\xD5\x3E\xA7"
+			  "\x10\x9C\x05\x6E\xFA\x63\xCC\x35"
+			  "\xC1\x2A\x93\x1F\x88\xF1\x5A\xE6"
+			  "\x4F\xB8\x21\xAD\x16\x7F\x0B\x74"
+			  "\xDD\x46\xD2\x3B\xA4\x0D\x99\x02"
+			  "\x6B\xF7\x60\xC9\x32\xBE\x27\x90"
+			  "\x1C\x85\xEE\x57\xE3\x4C\xB5\x1E"
+			  "\xAA\x13\x7C\x08\x71\xDA\x43\xCF"
+			  "\x38\xA1\x0A\x96\xFF\x68\xF4\x5D"
+			  "\xC6\x2F\xBB\x24\x8D\x19\x82\xEB"
+			  "\x54\xE0\x49\xB2\x1B\xA7\x10\x79"
+			  "\x05\x6E\xD7\x40\xCC\x35\x9E\x07"
+			  "\x93\xFC\x65\xF1\x5A\xC3\x2C\xB8"
+			  "\x21\x8A\x16\x7F\xE8\x51\xDD\x46"
+			  "\xAF\x18\xA4\x0D\x76\x02\x6B\xD4"
+			  "\x3D\xC9\x32\x9B\x04\x90\xF9\x62"
+			  "\xEE\x57\xC0\x29\xB5\x1E\x87\x13"
+			  "\x7C\xE5\x4E\xDA\x43\xAC\x15\xA1"
+			  "\x0A\x73\xFF\x68\xD1\x3A\xC6\x2F"
+			  "\x98\x01\x8D\xF6\x5F\xEB\x54\xBD"
+			  "\x26\xB2\x1B\x84\x10\x79\xE2\x4B"
+			  "\xD7\x40\xA9\x12\x9E\x07\x70\xFC"
+			  "\x65\xCE\x37\xC3\x2C\x95\x21\x8A"
+			  "\xF3\x5C\xE8\x51\xBA\x23\xAF\x18"
+			  "\x81\x0D\x76\xDF\x48\xD4\x3D\xA6"
+			  "\x0F\x9B\x04\x6D\xF9\x62\xCB\x34"
+			  "\xC0\x29\x92\x1E\x87\xF0\x59\xE5"
+			  "\x4E\xB7\x20\xAC\x15\x7E\x0A\x73"
+			  "\xDC\x45\xD1\x3A\xA3\x0C\x98\x01"
+			  "\x6A\xF6\x5F\xC8\x31\xBD\x26\x8F"
+			  "\x1B\x84\xED\x56\xE2\x4B\xB4\x1D"
+			  "\xA9\x12\x7B\x07\x70\xD9\x42\xCE"
+			  "\x37\xA0\x09\x95\xFE\x67\xF3\x5C"
+			  "\xC5\x2E\xBA\x23\x8C\x18\x81\xEA"
+			  "\x53\xDF\x48\xB1\x1A\xA6\x0F\x78"
+			  "\x04\x6D\xD6\x3F\xCB\x34\x9D\x06"
+			  "\x92\xFB\x64\xF0\x59\xC2\x2B\xB7"
+			  "\x20\x89\x15\x7E\xE7\x50\xDC\x45"
+			  "\xAE\x17\xA3\x0C\x75\x01\x6A\xD3"
+			  "\x3C\xC8\x31\x9A\x03\x8F\xF8\x61"
+			  "\xED\x56\xBF\x28\xB4\x1D\x86\x12",
+		.ctext	= "\x97\x7f\x69\x0f\x0f\x34\xa6\x33"
+			  "\x66\x49\x7e\xd0\x4d\x1b\xc9\x64"
+			  "\xf9\x61\x95\x98\x11\x00\x88\xf8"
+			  "\x2e\x88\x01\x0f\x2b\xe1\xae\x3e"
+			  "\xfe\xd6\x47\x30\x11\x68\x7d\x99"
+			  "\xad\x69\x6a\xe8\x41\x5f\x1e\x16"
+			  "\x00\x3a\x47\xdf\x8e\x7d\x23\x1c"
+			  "\x19\x5b\x32\x76\x60\x03\x05\xc1"
+			  "\xa0\xff\xcf\xcc\x74\x39\x46\x63"
+			  "\xfe\x5f\xa6\x35\xa7\xb4\xc1\xf9"
+			  "\x4b\x5e\x38\xcc\x8c\xc1\xa2\xcf"
+			  "\x9a\xc3\xae\x55\x42\x46\x93\xd9"
+			  "\xbd\x22\xd3\x8a\x19\x96\xc3\xb3"
+			  "\x7d\x03\x18\xf9\x45\x09\x9c\xc8"
+			  "\x90\xf3\x22\xb3\x25\x83\x9a\x75"
+			  "\xbb\x04\x48\x97\x3a\x63\x08\x04"
+			  "\xa0\x69\xf6\x52\xd4\x89\x93\x69"
+			  "\xb4\x33\xa2\x16\x58\xec\x4b\x26"
+			  "\x76\x54\x10\x0b\x6e\x53\x1e\xbc"
+			  "\x16\x18\x42\xb1\xb1\xd3\x4b\xda"
+			  "\x06\x9f\x8b\x77\xf7\xab\xd6\xed"
+			  "\xa3\x1d\x90\xda\x49\x38\x20\xb8"
+			  "\x6c\xee\xae\x3e\xae\x6c\x03\xb8"
+			  "\x0b\xed\xc8\xaa\x0e\xc5\x1f\x90"
+			  "\x60\xe2\xec\x1b\x76\xd0\xcf\xda"
+			  "\x29\x1b\xb8\x5a\xbc\xf4\xba\x13"
+			  "\x91\xa6\xcb\x83\x3f\xeb\xe9\x7b"
+			  "\x03\xba\x40\x9e\xe6\x7a\xb2\x4a"
+			  "\x73\x49\xfc\xed\xfb\x55\xa4\x24"
+			  "\xc7\xa4\xd7\x4b\xf5\xf7\x16\x62"
+			  "\x80\xd3\x19\x31\x52\x25\xa8\x69"
+			  "\xda\x9a\x87\xf5\xf2\xee\x5d\x61"
+			  "\xc1\x12\x72\x3e\x52\x26\x45\x3a"
+			  "\xd8\x9d\x57\xfa\x14\xe2\x9b\x2f"
+			  "\xd4\xaa\x5e\x31\xf4\x84\x89\xa4"
+			  "\xe3\x0e\xb0\x58\x41\x75\x6a\xcb"
+			  "\x30\x01\x98\x90\x15\x80\xf5\x27"
+			  "\x92\x13\x81\xf0\x1c\x1e\xfc\xb1"
+			  "\x33\xf7\x63\xb0\x67\xec\x2e\x5c"
+			  "\x85\xe3\x5b\xd0\x43\x8a\xb8\x5f"
+			  "\x44\x9f\xec\x19\xc9\x8f\xde\xdf"
+			  "\x79\xef\xf8\xee\x14\x87\xb3\x34"
+			  "\x76\x00\x3a\x9b\xc7\xed\xb1\x3d"
+			  "\xef\x07\xb0\xe4\xfd\x68\x9e\xeb"
+			  "\xc2\xb4\x1a\x85\x9a\x7d\x11\x88"
+			  "\xf8\xab\x43\x55\x2b\x8a\x4f\x60"
+			  "\x85\x9a\xf4\xba\xae\x48\x81\xeb"
+			  "\x93\x07\x97\x9e\xde\x2a\xfc\x4e"
+			  "\x31\xde\xaa\x44\xf7\x2a\xc3\xee"
+			  "\x60\xa2\x98\x2c\x0a\x88\x50\xc5"
+			  "\x6d\x89\xd3\xe4\xb6\xa7\xf4\xb0"
+			  "\xcf\x0e\x89\xe3\x5e\x8f\x82\xf4"
+			  "\x9d\xd1\xa9\x51\x50\x8a\xd2\x18"
+			  "\x07\xb2\xaa\x3b\x7f\x58\x9b\xf4"
+			  "\xb7\x24\x39\xd3\x66\x2f\x1e\xc0"
+			  "\x11\xa3\x56\x56\x2a\x10\x73\xbc"
+			  "\xe1\x23\xbf\xa9\x37\x07\x9c\xc3"
+			  "\xb2\xc9\xa8\x1c\x5b\x5c\x58\xa4"
+			  "\x77\x02\x26\xad\xc3\x40\x11\x53"
+			  "\x93\x68\x72\xde\x05\x8b\x10\xbc"
+			  "\xa6\xd4\x1b\xd9\x27\xd8\x16\x12"
+			  "\x61\x2b\x31\x2a\x44\x87\x96\x58",
+		.len	= 496,
+	},
+};
+
+/* based on hmac_sha256_aes_cbc_tv_temp */
+static const struct aead_testvec essiv_hmac_sha256_aes_cbc_tv_temp[] = {
+	{
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x00\x00\x00\x00\x00\x00\x00\x00"
+			  "\x06\xa9\x21\x40\x36\xb8\xa1\x5b"
+			  "\x51\x2e\x03\xd5\x34\x12\x00\x06",
+		.klen   = 8 + 32 + 16,
+		.iv     = "\xb3\x0c\x5a\x11\x41\xad\xc1\x04"
+			  "\xbc\x1e\x7e\x35\xb0\x5d\x78\x29",
+		.assoc	= "\x3d\xaf\xba\x42\x9d\x9e\xb4\x30"
+			  "\xb4\x22\xda\x80\x2c\x9f\xac\x41",
+		.alen	= 16,
+		.ptext	= "Single block msg",
+		.plen	= 16,
+		.ctext	= "\xe3\x53\x77\x9c\x10\x79\xae\xb8"
+			  "\x27\x08\x94\x2d\xbe\x77\x18\x1a"
+			  "\xcc\xde\x2d\x6a\xae\xf1\x0b\xcc"
+			  "\x38\x06\x38\x51\xb4\xb8\xf3\x5b"
+			  "\x5c\x34\xa6\xa3\x6e\x0b\x05\xe5"
+			  "\x6a\x6d\x44\xaa\x26\xa8\x44\xa5",
+		.clen	= 16 + 32,
+	}, {
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f"
+			  "\xc2\x86\x69\x6d\x88\x7c\x9a\xa0"
+			  "\x61\x1b\xbb\x3e\x20\x25\xa4\x5a",
+		.klen   = 8 + 32 + 16,
+		.iv     = "\x56\xe8\x14\xa5\x74\x18\x75\x13"
+			  "\x2f\x79\xe7\xc8\x65\xe3\x48\x45",
+		.assoc	= "\x56\x2e\x17\x99\x6d\x09\x3d\x28"
+			  "\xdd\xb3\xba\x69\x5a\x2e\x6f\x58",
+		.alen	= 16,
+		.ptext	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f",
+		.plen	= 32,
+		.ctext	= "\xd2\x96\xcd\x94\xc2\xcc\xcf\x8a"
+			  "\x3a\x86\x30\x28\xb5\xe1\xdc\x0a"
+			  "\x75\x86\x60\x2d\x25\x3c\xff\xf9"
+			  "\x1b\x82\x66\xbe\xa6\xd6\x1a\xb1"
+			  "\xf5\x33\x53\xf3\x68\x85\x2a\x99"
+			  "\x0e\x06\x58\x8f\xba\xf6\x06\xda"
+			  "\x49\x69\x0d\x5b\xd4\x36\x06\x62"
+			  "\x35\x5e\x54\x58\x53\x4d\xdf\xbf",
+		.clen	= 32 + 32,
+	}, {
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"            /* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"		/* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x6c\x3e\xa0\x47\x76\x30\xce\x21"
+			  "\xa2\xce\x33\x4a\xa7\x46\xc2\xcd",
+		.klen   = 8 + 32 + 16,
+		.iv     = "\x1f\x6b\xfb\xd6\x6b\x72\x2f\xc9"
+			  "\xb6\x9f\x8c\x10\xa8\x96\x15\x64",
+		.assoc	= "\xc7\x82\xdc\x4c\x09\x8c\x66\xcb"
+			  "\xd9\xcd\x27\xd8\x25\x68\x2c\x81",
+		.alen	= 16,
+		.ptext	= "This is a 48-byte message (exactly 3 AES blocks)",
+		.plen	= 48,
+		.ctext	= "\xd0\xa0\x2b\x38\x36\x45\x17\x53"
+			  "\xd4\x93\x66\x5d\x33\xf0\xe8\x86"
+			  "\x2d\xea\x54\xcd\xb2\x93\xab\xc7"
+			  "\x50\x69\x39\x27\x67\x72\xf8\xd5"
+			  "\x02\x1c\x19\x21\x6b\xad\x52\x5c"
+			  "\x85\x79\x69\x5d\x83\xba\x26\x84"
+			  "\x68\xb9\x3e\x90\x38\xa0\x88\x01"
+			  "\xe7\xc6\xce\x10\x31\x2f\x9b\x1d"
+			  "\x24\x78\xfb\xbe\x02\xe0\x4f\x40"
+			  "\x10\xbd\xaa\xc6\xa7\x79\xe0\x1a",
+		.clen	= 48 + 32,
+	}, {
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x56\xe4\x7a\x38\xc5\x59\x89\x74"
+			  "\xbc\x46\x90\x3d\xba\x29\x03\x49",
+		.klen   = 8 + 32 + 16,
+		.iv     = "\x13\xe5\xf2\xef\x61\x97\x59\x35"
+			  "\x9b\x36\x84\x46\x4e\x63\xd1\x41",
+		.assoc	= "\x8c\xe8\x2e\xef\xbe\xa0\xda\x3c"
+			  "\x44\x69\x9e\xd7\xdb\x51\xb7\xd9",
+		.alen	= 16,
+		.ptext	= "\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7"
+			  "\xa8\xa9\xaa\xab\xac\xad\xae\xaf"
+			  "\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7"
+			  "\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf"
+			  "\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7"
+			  "\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf"
+			  "\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7"
+			  "\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf",
+		.plen	= 64,
+		.ctext	= "\xc3\x0e\x32\xff\xed\xc0\x77\x4e"
+			  "\x6a\xff\x6a\xf0\x86\x9f\x71\xaa"
+			  "\x0f\x3a\xf0\x7a\x9a\x31\xa9\xc6"
+			  "\x84\xdb\x20\x7e\xb0\xef\x8e\x4e"
+			  "\x35\x90\x7a\xa6\x32\xc3\xff\xdf"
+			  "\x86\x8b\xb7\xb2\x9d\x3d\x46\xad"
+			  "\x83\xce\x9f\x9a\x10\x2e\xe9\x9d"
+			  "\x49\xa5\x3e\x87\xf4\xc3\xda\x55"
+			  "\x7a\x1b\xd4\x3c\xdb\x17\x95\xe2"
+			  "\xe0\x93\xec\xc9\x9f\xf7\xce\xd8"
+			  "\x3f\x54\xe2\x49\x39\xe3\x71\x25"
+			  "\x2b\x6c\xe9\x5d\xec\xec\x2b\x64",
+		.clen	= 64 + 32,
+	}, {
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"            /* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x10"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x90\xd3\x82\xb4\x10\xee\xba\x7a"
+			  "\xd9\x38\xc4\x6c\xec\x1a\x82\xbf",
+		.klen   = 8 + 32 + 16,
+		.iv     = "\xe4\x13\xa1\x15\xe9\x6b\xb8\x23"
+			  "\x81\x7a\x94\x29\xab\xfd\xd2\x2c",
+		.assoc  = "\x00\x00\x43\x21\x00\x00\x00\x01"
+			  "\xe9\x6e\x8c\x08\xab\x46\x57\x63"
+			  "\xfd\x09\x8d\x45\xdd\x3f\xf8\x93",
+		.alen   = 24,
+		.ptext	= "\x08\x00\x0e\xbd\xa7\x0a\x00\x00"
+			  "\x8e\x9c\x08\x3d\xb9\x5b\x07\x00"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f"
+			  "\x10\x11\x12\x13\x14\x15\x16\x17"
+			  "\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
+			  "\x20\x21\x22\x23\x24\x25\x26\x27"
+			  "\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f"
+			  "\x30\x31\x32\x33\x34\x35\x36\x37"
+			  "\x01\x02\x03\x04\x05\x06\x07\x08"
+			  "\x09\x0a\x0b\x0c\x0d\x0e\x0e\x01",
+		.plen	= 80,
+		.ctext	= "\xf6\x63\xc2\x5d\x32\x5c\x18\xc6"
+			  "\xa9\x45\x3e\x19\x4e\x12\x08\x49"
+			  "\xa4\x87\x0b\x66\xcc\x6b\x99\x65"
+			  "\x33\x00\x13\xb4\x89\x8d\xc8\x56"
+			  "\xa4\x69\x9e\x52\x3a\x55\xdb\x08"
+			  "\x0b\x59\xec\x3a\x8e\x4b\x7e\x52"
+			  "\x77\x5b\x07\xd1\xdb\x34\xed\x9c"
+			  "\x53\x8a\xb5\x0c\x55\x1b\x87\x4a"
+			  "\xa2\x69\xad\xd0\x47\xad\x2d\x59"
+			  "\x13\xac\x19\xb7\xcf\xba\xd4\xa6"
+			  "\xbb\xd4\x0f\xbe\xa3\x3b\x4c\xb8"
+			  "\x3a\xd2\xe1\x03\x86\xa5\x59\xb7"
+			  "\x73\xc3\x46\x20\x2c\xb1\xef\x68"
+			  "\xbb\x8a\x32\x7e\x12\x8c\x69\xcf",
+		.clen	= 80 + 32,
+       }, {
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"            /* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x18"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x8e\x73\xb0\xf7\xda\x0e\x64\x52"
+			  "\xc8\x10\xf3\x2b\x80\x90\x79\xe5"
+			  "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b",
+		.klen   = 8 + 32 + 24,
+		.iv     = "\x49\xca\x41\xc9\x6b\xbf\x6c\x98"
+			  "\x38\x2f\xa7\x3d\x4d\x80\x49\xb0",
+		.assoc	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.alen   = 16,
+		.ptext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.plen	= 64,
+		.ctext	= "\x4f\x02\x1d\xb2\x43\xbc\x63\x3d"
+			  "\x71\x78\x18\x3a\x9f\xa0\x71\xe8"
+			  "\xb4\xd9\xad\xa9\xad\x7d\xed\xf4"
+			  "\xe5\xe7\x38\x76\x3f\x69\x14\x5a"
+			  "\x57\x1b\x24\x20\x12\xfb\x7a\xe0"
+			  "\x7f\xa9\xba\xac\x3d\xf1\x02\xe0"
+			  "\x08\xb0\xe2\x79\x88\x59\x88\x81"
+			  "\xd9\x20\xa9\xe6\x4f\x56\x15\xcd"
+			  "\x2f\xee\x5f\xdb\x66\xfe\x79\x09"
+			  "\x61\x81\x31\xea\x5b\x3d\x8e\xfb"
+			  "\xca\x71\x85\x93\xf7\x85\x55\x8b"
+			  "\x7a\xe4\x94\xca\x8b\xba\x19\x33",
+		.clen	= 64 + 32,
+	}, {
+#ifdef __LITTLE_ENDIAN
+		.key    = "\x08\x00"		/* rta length */
+			  "\x01\x00"		/* rta type */
+#else
+		.key    = "\x00\x08"		/* rta length */
+			  "\x00\x01"            /* rta type */
+#endif
+			  "\x00\x00\x00\x20"	/* enc key length */
+			  "\x11\x22\x33\x44\x55\x66\x77\x88"
+			  "\x99\xaa\xbb\xcc\xdd\xee\xff\x11"
+			  "\x22\x33\x44\x55\x66\x77\x88\x99"
+			  "\xaa\xbb\xcc\xdd\xee\xff\x11\x22"
+			  "\x60\x3d\xeb\x10\x15\xca\x71\xbe"
+			  "\x2b\x73\xae\xf0\x85\x7d\x77\x81"
+			  "\x1f\x35\x2c\x07\x3b\x61\x08\xd7"
+			  "\x2d\x98\x10\xa3\x09\x14\xdf\xf4",
+		.klen   = 8 + 32 + 32,
+		.iv     = "\xdf\xab\xf2\x7c\xdc\xe0\x33\x4c"
+			  "\xf9\x75\xaf\xf9\x2f\x60\x3a\x9b",
+		.assoc	= "\x00\x01\x02\x03\x04\x05\x06\x07"
+			  "\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f",
+		.alen   = 16,
+		.ptext	= "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96"
+			  "\xe9\x3d\x7e\x11\x73\x93\x17\x2a"
+			  "\xae\x2d\x8a\x57\x1e\x03\xac\x9c"
+			  "\x9e\xb7\x6f\xac\x45\xaf\x8e\x51"
+			  "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11"
+			  "\xe5\xfb\xc1\x19\x1a\x0a\x52\xef"
+			  "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17"
+			  "\xad\x2b\x41\x7b\xe6\x6c\x37\x10",
+		.plen	= 64,
+		.ctext	= "\xf5\x8c\x4c\x04\xd6\xe5\xf1\xba"
+			  "\x77\x9e\xab\xfb\x5f\x7b\xfb\xd6"
+			  "\x9c\xfc\x4e\x96\x7e\xdb\x80\x8d"
+			  "\x67\x9f\x77\x7b\xc6\x70\x2c\x7d"
+			  "\x39\xf2\x33\x69\xa9\xd9\xba\xcf"
+			  "\xa5\x30\xe2\x63\x04\x23\x14\x61"
+			  "\xb2\xeb\x05\xe2\xc3\x9b\xe9\xfc"
+			  "\xda\x6c\x19\x07\x8c\x6a\x9d\x1b"
+			  "\x24\x29\xed\xc2\x31\x49\xdb\xb1"
+			  "\x8f\x74\xbd\x17\x92\x03\xbe\x8f"
+			  "\xf3\x61\xde\x1c\xe9\xdb\xcd\xd0"
+			  "\xcc\xce\xe9\x85\x57\xcf\x6f\x5f",
+		.clen	= 64 + 32,
+	},
+};
+
 #endif	/* _CRYPTO_TESTMGR_H */
diff --git a/crypto/xts.c b/crypto/xts.c
index 1121100..ab11763 100644
--- a/crypto/xts.c
+++ b/crypto/xts.c
@@ -1,8 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /* XTS: as defined in IEEE1619/D16
  *	http://grouper.ieee.org/groups/1619/email/pdf00086.pdf
- *	(sector sizes which are not a multiple of 16 bytes are,
- *	however currently unsupported)
  *
  * Copyright (c) 2007 Rik Snel <rsnel@cube.dyndns.org>
  *
@@ -34,6 +32,8 @@
 
 struct rctx {
 	le128 t;
+	struct scatterlist *tail;
+	struct scatterlist sg[2];
 	struct skcipher_request subreq;
 };
 
@@ -84,10 +84,11 @@
  * mutliple calls to the 'ecb(..)' instance, which usually would be slower than
  * just doing the gf128mul_x_ble() calls again.
  */
-static int xor_tweak(struct skcipher_request *req, bool second_pass)
+static int xor_tweak(struct skcipher_request *req, bool second_pass, bool enc)
 {
 	struct rctx *rctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	const bool cts = (req->cryptlen % XTS_BLOCK_SIZE);
 	const int bs = XTS_BLOCK_SIZE;
 	struct skcipher_walk w;
 	le128 t = rctx->t;
@@ -109,6 +110,20 @@
 		wdst = w.dst.virt.addr;
 
 		do {
+			if (unlikely(cts) &&
+			    w.total - w.nbytes + avail < 2 * XTS_BLOCK_SIZE) {
+				if (!enc) {
+					if (second_pass)
+						rctx->t = t;
+					gf128mul_x_ble(&t, &t);
+				}
+				le128_xor(wdst, &t, wsrc);
+				if (enc && second_pass)
+					gf128mul_x_ble(&rctx->t, &t);
+				skcipher_walk_done(&w, avail - bs);
+				return 0;
+			}
+
 			le128_xor(wdst++, &t, wsrc++);
 			gf128mul_x_ble(&t, &t);
 		} while ((avail -= bs) >= bs);
@@ -119,17 +134,71 @@
 	return err;
 }
 
-static int xor_tweak_pre(struct skcipher_request *req)
+static int xor_tweak_pre(struct skcipher_request *req, bool enc)
 {
-	return xor_tweak(req, false);
+	return xor_tweak(req, false, enc);
 }
 
-static int xor_tweak_post(struct skcipher_request *req)
+static int xor_tweak_post(struct skcipher_request *req, bool enc)
 {
-	return xor_tweak(req, true);
+	return xor_tweak(req, true, enc);
 }
 
-static void crypt_done(struct crypto_async_request *areq, int err)
+static void cts_done(struct crypto_async_request *areq, int err)
+{
+	struct skcipher_request *req = areq->data;
+	le128 b;
+
+	if (!err) {
+		struct rctx *rctx = skcipher_request_ctx(req);
+
+		scatterwalk_map_and_copy(&b, rctx->tail, 0, XTS_BLOCK_SIZE, 0);
+		le128_xor(&b, &rctx->t, &b);
+		scatterwalk_map_and_copy(&b, rctx->tail, 0, XTS_BLOCK_SIZE, 1);
+	}
+
+	skcipher_request_complete(req, err);
+}
+
+static int cts_final(struct skcipher_request *req,
+		     int (*crypt)(struct skcipher_request *req))
+{
+	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
+	int offset = req->cryptlen & ~(XTS_BLOCK_SIZE - 1);
+	struct rctx *rctx = skcipher_request_ctx(req);
+	struct skcipher_request *subreq = &rctx->subreq;
+	int tail = req->cryptlen % XTS_BLOCK_SIZE;
+	le128 b[2];
+	int err;
+
+	rctx->tail = scatterwalk_ffwd(rctx->sg, req->dst,
+				      offset - XTS_BLOCK_SIZE);
+
+	scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 0);
+	memcpy(b + 1, b, tail);
+	scatterwalk_map_and_copy(b, req->src, offset, tail, 0);
+
+	le128_xor(b, &rctx->t, b);
+
+	scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE + tail, 1);
+
+	skcipher_request_set_tfm(subreq, ctx->child);
+	skcipher_request_set_callback(subreq, req->base.flags, cts_done, req);
+	skcipher_request_set_crypt(subreq, rctx->tail, rctx->tail,
+				   XTS_BLOCK_SIZE, NULL);
+
+	err = crypt(subreq);
+	if (err)
+		return err;
+
+	scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 0);
+	le128_xor(b, &rctx->t, b);
+	scatterwalk_map_and_copy(b, rctx->tail, 0, XTS_BLOCK_SIZE, 1);
+
+	return 0;
+}
+
+static void encrypt_done(struct crypto_async_request *areq, int err)
 {
 	struct skcipher_request *req = areq->data;
 
@@ -137,47 +206,90 @@
 		struct rctx *rctx = skcipher_request_ctx(req);
 
 		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
-		err = xor_tweak_post(req);
+		err = xor_tweak_post(req, true);
+
+		if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
+			err = cts_final(req, crypto_skcipher_encrypt);
+			if (err == -EINPROGRESS)
+				return;
+		}
 	}
 
 	skcipher_request_complete(req, err);
 }
 
-static void init_crypt(struct skcipher_request *req)
+static void decrypt_done(struct crypto_async_request *areq, int err)
+{
+	struct skcipher_request *req = areq->data;
+
+	if (!err) {
+		struct rctx *rctx = skcipher_request_ctx(req);
+
+		rctx->subreq.base.flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
+		err = xor_tweak_post(req, false);
+
+		if (!err && unlikely(req->cryptlen % XTS_BLOCK_SIZE)) {
+			err = cts_final(req, crypto_skcipher_decrypt);
+			if (err == -EINPROGRESS)
+				return;
+		}
+	}
+
+	skcipher_request_complete(req, err);
+}
+
+static int init_crypt(struct skcipher_request *req, crypto_completion_t compl)
 {
 	struct priv *ctx = crypto_skcipher_ctx(crypto_skcipher_reqtfm(req));
 	struct rctx *rctx = skcipher_request_ctx(req);
 	struct skcipher_request *subreq = &rctx->subreq;
 
+	if (req->cryptlen < XTS_BLOCK_SIZE)
+		return -EINVAL;
+
 	skcipher_request_set_tfm(subreq, ctx->child);
-	skcipher_request_set_callback(subreq, req->base.flags, crypt_done, req);
+	skcipher_request_set_callback(subreq, req->base.flags, compl, req);
 	skcipher_request_set_crypt(subreq, req->dst, req->dst,
-				   req->cryptlen, NULL);
+				   req->cryptlen & ~(XTS_BLOCK_SIZE - 1), NULL);
 
 	/* calculate first value of T */
 	crypto_cipher_encrypt_one(ctx->tweak, (u8 *)&rctx->t, req->iv);
+
+	return 0;
 }
 
 static int encrypt(struct skcipher_request *req)
 {
 	struct rctx *rctx = skcipher_request_ctx(req);
 	struct skcipher_request *subreq = &rctx->subreq;
+	int err;
 
-	init_crypt(req);
-	return xor_tweak_pre(req) ?:
-		crypto_skcipher_encrypt(subreq) ?:
-		xor_tweak_post(req);
+	err = init_crypt(req, encrypt_done) ?:
+	      xor_tweak_pre(req, true) ?:
+	      crypto_skcipher_encrypt(subreq) ?:
+	      xor_tweak_post(req, true);
+
+	if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
+		return err;
+
+	return cts_final(req, crypto_skcipher_encrypt);
 }
 
 static int decrypt(struct skcipher_request *req)
 {
 	struct rctx *rctx = skcipher_request_ctx(req);
 	struct skcipher_request *subreq = &rctx->subreq;
+	int err;
 
-	init_crypt(req);
-	return xor_tweak_pre(req) ?:
-		crypto_skcipher_decrypt(subreq) ?:
-		xor_tweak_post(req);
+	err = init_crypt(req, decrypt_done) ?:
+	      xor_tweak_pre(req, false) ?:
+	      crypto_skcipher_decrypt(subreq) ?:
+	      xor_tweak_post(req, false);
+
+	if (err || likely((req->cryptlen % XTS_BLOCK_SIZE) == 0))
+		return err;
+
+	return cts_final(req, crypto_skcipher_decrypt);
 }
 
 static int init_tfm(struct crypto_skcipher *tfm)
diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c
index 4334262..e557057 100644
--- a/drivers/char/hw_random/atmel-rng.c
+++ b/drivers/char/hw_random/atmel-rng.c
@@ -86,7 +86,7 @@
 	trng->rng.name = pdev->name;
 	trng->rng.read = atmel_trng_read;
 
-	ret = hwrng_register(&trng->rng);
+	ret = devm_hwrng_register(&pdev->dev, &trng->rng);
 	if (ret)
 		goto err_register;
 
@@ -103,7 +103,6 @@
 {
 	struct atmel_trng *trng = platform_get_drvdata(pdev);
 
-	hwrng_unregister(&trng->rng);
 
 	atmel_trng_disable(trng);
 	clk_disable_unprepare(trng->clk);
diff --git a/drivers/char/hw_random/cavium-rng-vf.c b/drivers/char/hw_random/cavium-rng-vf.c
index 2d1352b..3de4a6a 100644
--- a/drivers/char/hw_random/cavium-rng-vf.c
+++ b/drivers/char/hw_random/cavium-rng-vf.c
@@ -67,7 +67,7 @@
 
 	pci_set_drvdata(pdev, rng);
 
-	ret = hwrng_register(&rng->ops);
+	ret = devm_hwrng_register(&pdev->dev, &rng->ops);
 	if (ret) {
 		dev_err(&pdev->dev, "Error registering device as HWRNG.\n");
 		return ret;
@@ -76,14 +76,6 @@
 	return 0;
 }
 
-/* Remove the VF */
-static void  cavium_rng_remove_vf(struct pci_dev *pdev)
-{
-	struct cavium_rng *rng;
-
-	rng = pci_get_drvdata(pdev);
-	hwrng_unregister(&rng->ops);
-}
 
 static const struct pci_device_id cavium_rng_vf_id_table[] = {
 	{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, 0xa033), 0, 0, 0},
@@ -95,7 +87,6 @@
 	.name		= "cavium_rng_vf",
 	.id_table	= cavium_rng_vf_id_table,
 	.probe		= cavium_rng_probe_vf,
-	.remove		= cavium_rng_remove_vf,
 };
 module_pci_driver(cavium_rng_vf_driver);
 
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index 9044d31..bdab5d9 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -13,6 +13,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/err.h>
+#include <linux/freezer.h>
 #include <linux/fs.h>
 #include <linux/hw_random.h>
 #include <linux/kernel.h>
@@ -421,7 +422,9 @@
 {
 	long rc;
 
-	while (!kthread_should_stop()) {
+	set_freezable();
+
+	while (!kthread_freezable_should_stop(NULL)) {
 		struct hwrng *rng;
 
 		rng = get_current_rng();
diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c
index 9423576..b4b52ab 100644
--- a/drivers/char/hw_random/exynos-trng.c
+++ b/drivers/char/hw_random/exynos-trng.c
@@ -153,7 +153,7 @@
 		goto err_clock;
 	}
 
-	ret = hwrng_register(&trng->rng);
+	ret = devm_hwrng_register(&pdev->dev, &trng->rng);
 	if (ret) {
 		dev_err(&pdev->dev, "Could not register hwrng device.\n");
 		goto err_register;
@@ -179,7 +179,6 @@
 {
 	struct exynos_trng_dev *trng =  platform_get_drvdata(pdev);
 
-	hwrng_unregister(&trng->rng);
 	clk_disable_unprepare(trng->clk);
 
 	pm_runtime_put_sync(&pdev->dev);
diff --git a/drivers/char/hw_random/imx-rngc.c b/drivers/char/hw_random/imx-rngc.c
index 69f5379..30cf00f 100644
--- a/drivers/char/hw_random/imx-rngc.c
+++ b/drivers/char/hw_random/imx-rngc.c
@@ -196,7 +196,6 @@
 static int imx_rngc_probe(struct platform_device *pdev)
 {
 	struct imx_rngc *rngc;
-	struct resource *res;
 	int ret;
 	int irq;
 
@@ -204,8 +203,7 @@
 	if (!rngc)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	rngc->base = devm_ioremap_resource(&pdev->dev, res);
+	rngc->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(rngc->base))
 		return PTR_ERR(rngc->base);
 
diff --git a/drivers/char/hw_random/mxc-rnga.c b/drivers/char/hw_random/mxc-rnga.c
index ea2bf18..025083c 100644
--- a/drivers/char/hw_random/mxc-rnga.c
+++ b/drivers/char/hw_random/mxc-rnga.c
@@ -134,7 +134,6 @@
 static int __init mxc_rnga_probe(struct platform_device *pdev)
 {
 	int err;
-	struct resource *res;
 	struct mxc_rng *mxc_rng;
 
 	mxc_rng = devm_kzalloc(&pdev->dev, sizeof(*mxc_rng), GFP_KERNEL);
@@ -158,8 +157,7 @@
 	if (err)
 		return err;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	mxc_rng->mem = devm_ioremap_resource(&pdev->dev, res);
+	mxc_rng->mem = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(mxc_rng->mem)) {
 		err = PTR_ERR(mxc_rng->mem);
 		goto err_ioremap;
diff --git a/drivers/char/hw_random/n2-drv.c b/drivers/char/hw_random/n2-drv.c
index d4cab10..73e4081 100644
--- a/drivers/char/hw_random/n2-drv.c
+++ b/drivers/char/hw_random/n2-drv.c
@@ -768,7 +768,7 @@
 	np->hwrng.data_read = n2rng_data_read;
 	np->hwrng.priv = (unsigned long) np;
 
-	err = hwrng_register(&np->hwrng);
+	err = devm_hwrng_register(&op->dev, &np->hwrng);
 	if (err)
 		goto out_hvapi_unregister;
 
@@ -793,8 +793,6 @@
 
 	cancel_delayed_work_sync(&np->work);
 
-	hwrng_unregister(&np->hwrng);
-
 	sun4v_hvapi_unregister(HV_GRP_RNG);
 
 	return 0;
diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c
index fc0f6b0..74ed29f 100644
--- a/drivers/char/hw_random/nomadik-rng.c
+++ b/drivers/char/hw_random/nomadik-rng.c
@@ -57,7 +57,7 @@
 	if (!base)
 		goto out_release;
 	nmk_rng.priv = (unsigned long)base;
-	ret = hwrng_register(&nmk_rng);
+	ret = devm_hwrng_register(&dev->dev, &nmk_rng);
 	if (ret)
 		goto out_release;
 	return 0;
@@ -71,7 +71,6 @@
 
 static int nmk_rng_remove(struct amba_device *dev)
 {
-	hwrng_unregister(&nmk_rng);
 	amba_release_regions(dev);
 	clk_disable(rng_clk);
 	return 0;
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index e9b6ac6..b27f396 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -500,7 +500,7 @@
 	if (ret)
 		goto err_register;
 
-	ret = hwrng_register(&priv->rng);
+	ret = devm_hwrng_register(&pdev->dev, &priv->rng);
 	if (ret)
 		goto err_register;
 
@@ -525,7 +525,6 @@
 {
 	struct omap_rng_dev *priv = platform_get_drvdata(pdev);
 
-	hwrng_unregister(&priv->rng);
 
 	priv->pdata->cleanup(priv);
 
diff --git a/drivers/char/hw_random/powernv-rng.c b/drivers/char/hw_random/powernv-rng.c
index f2e8272..8da1d79 100644
--- a/drivers/char/hw_random/powernv-rng.c
+++ b/drivers/char/hw_random/powernv-rng.c
@@ -33,18 +33,11 @@
 	.read = powernv_rng_read,
 };
 
-static int powernv_rng_remove(struct platform_device *pdev)
-{
-	hwrng_unregister(&powernv_hwrng);
-
-	return 0;
-}
-
 static int powernv_rng_probe(struct platform_device *pdev)
 {
 	int rc;
 
-	rc = hwrng_register(&powernv_hwrng);
+	rc = devm_hwrng_register(&pdev->dev, &powernv_hwrng);
 	if (rc) {
 		/* We only register one device, ignore any others */
 		if (rc == -EEXIST)
@@ -70,7 +63,6 @@
 		.of_match_table = powernv_rng_match,
 	},
 	.probe	= powernv_rng_probe,
-	.remove = powernv_rng_remove,
 };
 module_platform_driver(powernv_rng_driver);
 
diff --git a/drivers/char/hw_random/st-rng.c b/drivers/char/hw_random/st-rng.c
index bd6a98b..8634483 100644
--- a/drivers/char/hw_random/st-rng.c
+++ b/drivers/char/hw_random/st-rng.c
@@ -102,7 +102,7 @@
 
 	dev_set_drvdata(&pdev->dev, ddata);
 
-	ret = hwrng_register(&ddata->ops);
+	ret = devm_hwrng_register(&pdev->dev, &ddata->ops);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to register HW RNG\n");
 		clk_disable_unprepare(clk);
@@ -118,8 +118,6 @@
 {
 	struct st_rng_data *ddata = dev_get_drvdata(&pdev->dev);
 
-	hwrng_unregister(&ddata->ops);
-
 	clk_disable_unprepare(ddata->clk);
 
 	return 0;
diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c
index ccd1f6e..e262445 100644
--- a/drivers/char/hw_random/timeriomem-rng.c
+++ b/drivers/char/hw_random/timeriomem-rng.c
@@ -117,9 +117,9 @@
 	if (!res)
 		return -ENXIO;
 
-	if (res->start % 4 != 0 || resource_size(res) != 4) {
+	if (res->start % 4 != 0 || resource_size(res) < 4) {
 		dev_err(&pdev->dev,
-			"address must be four bytes wide and aligned\n");
+			"address must be at least four bytes wide and 32-bit aligned\n");
 		return -EINVAL;
 	}
 
diff --git a/drivers/char/hw_random/xgene-rng.c b/drivers/char/hw_random/xgene-rng.c
index 8c6f9f6..7e568db 100644
--- a/drivers/char/hw_random/xgene-rng.c
+++ b/drivers/char/hw_random/xgene-rng.c
@@ -361,7 +361,7 @@
 
 	xgene_rng_func.priv = (unsigned long) ctx;
 
-	rc = hwrng_register(&xgene_rng_func);
+	rc = devm_hwrng_register(&pdev->dev, &xgene_rng_func);
 	if (rc) {
 		dev_err(&pdev->dev, "RNG registering failed error %d\n", rc);
 		if (!IS_ERR(ctx->clk))
@@ -375,7 +375,6 @@
 			rc);
 		if (!IS_ERR(ctx->clk))
 			clk_disable_unprepare(ctx->clk);
-		hwrng_unregister(&xgene_rng_func);
 		return rc;
 	}
 
@@ -392,7 +391,6 @@
 		dev_err(&pdev->dev, "RNG init wakeup failed error %d\n", rc);
 	if (!IS_ERR(ctx->clk))
 		clk_disable_unprepare(ctx->clk);
-	hwrng_unregister(&xgene_rng_func);
 
 	return rc;
 }
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 566922d..d3beed0 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -327,6 +327,7 @@
 #include <linux/percpu.h>
 #include <linux/cryptohash.h>
 #include <linux/fips.h>
+#include <linux/freezer.h>
 #include <linux/ptrace.h>
 #include <linux/workqueue.h>
 #include <linux/irq.h>
@@ -2439,7 +2440,8 @@
 	 * We'll be woken up again once below random_write_wakeup_thresh,
 	 * or when the calling thread is about to terminate.
 	 */
-	wait_event_interruptible(random_write_wait, kthread_should_stop() ||
+	wait_event_freezable(random_write_wait,
+			kthread_should_stop() ||
 			ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
 	mix_pool_bytes(poolp, buffer, count);
 	credit_entropy_bits(poolp, entropy);
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index d7c85c7..1fb622f 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -27,7 +27,7 @@
 	tristate "PadLock driver for AES algorithm"
 	depends on CRYPTO_DEV_PADLOCK
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_AES
+	select CRYPTO_LIB_AES
 	help
 	  Use VIA PadLock for AES algorithm.
 
@@ -170,7 +170,7 @@
 	depends on S390
 	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
 	  This is the s390 hardware accelerated implementation of the
 	  DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
@@ -209,12 +209,12 @@
 	  It is available as of z9.
 
 config CRYPTO_GHASH_S390
-	tristate "GHASH digest algorithm"
+	tristate "GHASH hash function"
 	depends on S390
 	select CRYPTO_HASH
 	help
-	  This is the s390 hardware accelerated implementation of the
-	  GHASH message digest algorithm for GCM (Galois/Counter Mode).
+	  This is the s390 hardware accelerated implementation of GHASH,
+	  the hash function used in GCM (Galois/Counter mode).
 
 	  It is available as of z196.
 
@@ -234,8 +234,8 @@
 config CRYPTO_DEV_MARVELL_CESA
 	tristate "Marvell's Cryptographic Engine driver"
 	depends on PLAT_ORION || ARCH_MVEBU
-	select CRYPTO_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_AES
+	select CRYPTO_LIB_DES
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_HASH
 	select SRAM
@@ -247,7 +247,7 @@
 
 config CRYPTO_DEV_NIAGARA2
        tristate "Niagara2 Stream Processing Unit driver"
-       select CRYPTO_DES
+       select CRYPTO_LIB_DES
        select CRYPTO_BLKCIPHER
        select CRYPTO_HASH
        select CRYPTO_MD5
@@ -264,7 +264,7 @@
 
 config CRYPTO_DEV_HIFN_795X
 	tristate "Driver HIFN 795x crypto accelerator chips"
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_BLKCIPHER
 	select HW_RANDOM if CRYPTO_DEV_HIFN_795X_RNG
 	depends on PCI
@@ -320,7 +320,7 @@
 config CRYPTO_DEV_IXP4XX
 	tristate "Driver for IXP4xx crypto hardware acceleration"
 	depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
@@ -332,7 +332,7 @@
 	depends on PPC && 4xx
 	select CRYPTO_HASH
 	select CRYPTO_AEAD
-	select CRYPTO_AES
+	select CRYPTO_LIB_AES
 	select CRYPTO_CCM
 	select CRYPTO_CTR
 	select CRYPTO_GCM
@@ -386,7 +386,7 @@
 config CRYPTO_DEV_OMAP_DES
 	tristate "Support for OMAP DES/3DES hw engine"
 	depends on ARCH_OMAP2PLUS
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_ENGINE
 	help
@@ -404,7 +404,7 @@
 	select CRYPTO_AES
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_CBC
 	select CRYPTO_ECB
 	select CRYPTO_SEQIV
@@ -413,7 +413,7 @@
 	  Picochip picoXcell SoC devices. Select this for IPSEC ESP offload
 	  and for 3gpp Layer 2 ciphering support.
 
-	  Saying m here will build a module named pipcoxcell_crypto.
+	  Saying m here will build a module named picoxcell_crypto.
 
 config CRYPTO_DEV_SAHARA
 	tristate "Support for SAHARA crypto accelerator"
@@ -517,7 +517,7 @@
 config CRYPTO_DEV_ATMEL_TDES
 	tristate "Support for Atmel DES/TDES hw accelerator"
 	depends on ARCH_AT91 || COMPILE_TEST
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_BLKCIPHER
 	help
 	  Some Atmel processors have DES/TDES hw accelerator.
@@ -615,7 +615,7 @@
 	depends on ARCH_QCOM || COMPILE_TEST
 	depends on HAS_IOMEM
 	select CRYPTO_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_ECB
 	select CRYPTO_CBC
 	select CRYPTO_XTS
@@ -663,7 +663,7 @@
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
 	select CRYPTO_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_BLKCIPHER
 	help
 	  Some Allwinner SoC have a crypto accelerator named
@@ -686,7 +686,7 @@
 	tristate "Rockchip's Cryptographic Engine driver"
 	depends on OF && ARCH_ROCKCHIP
 	select CRYPTO_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
@@ -723,7 +723,7 @@
 	depends on MAILBOX
 	default m
 	select CRYPTO_AUTHENC
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_MD5
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
@@ -737,12 +737,11 @@
 
 config CRYPTO_DEV_SAFEXCEL
 	tristate "Inside Secure's SafeXcel cryptographic engine driver"
-	depends on OF
-	depends on (ARM64 && ARCH_MVEBU) || (COMPILE_TEST && 64BIT)
-	select CRYPTO_AES
+	depends on OF || PCI || COMPILE_TEST
+	select CRYPTO_LIB_AES
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_HASH
 	select CRYPTO_HMAC
 	select CRYPTO_MD5
@@ -750,10 +749,11 @@
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
 	help
-	  This driver interfaces with the SafeXcel EIP-197 cryptographic engine
-	  designed by Inside Secure. Select this if you want to use CBC/ECB
-	  chain mode, AES cipher mode and SHA1/SHA224/SHA256/SHA512 hash
-	  algorithms.
+	  This driver interfaces with the SafeXcel EIP-97 and EIP-197 cryptographic
+	  engines designed by Inside Secure. It currently accelerates DES, 3DES and
+	  AES block ciphers in ECB and CBC mode, as well as SHA1, SHA224, SHA256,
+	  SHA384 and SHA512 hash algorithms for both basic hash and HMAC.
+	  Additionally, it accelerates combined AES-CBC/HMAC-SHA AEAD operations.
 
 config CRYPTO_DEV_ARTPEC6
 	tristate "Support for Axis ARTPEC-6/7 hardware crypto acceleration."
@@ -780,7 +780,7 @@
 	default n
 	select CRYPTO_HASH
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
 	select CRYPTO_SHA1
diff --git a/drivers/crypto/amcc/crypto4xx_alg.c b/drivers/crypto/amcc/crypto4xx_alg.c
index cbfc6072..a42f861 100644
--- a/drivers/crypto/amcc/crypto4xx_alg.c
+++ b/drivers/crypto/amcc/crypto4xx_alg.c
@@ -527,28 +527,20 @@
 static int crypto4xx_compute_gcm_hash_key_sw(__le32 *hash_start, const u8 *key,
 					     unsigned int keylen)
 {
-	struct crypto_cipher *aes_tfm = NULL;
+	struct crypto_aes_ctx ctx;
 	uint8_t src[16] = { 0 };
-	int rc = 0;
+	int rc;
 
-	aes_tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_NEED_FALLBACK);
-	if (IS_ERR(aes_tfm)) {
-		rc = PTR_ERR(aes_tfm);
-		pr_warn("could not load aes cipher driver: %d\n", rc);
+	rc = aes_expandkey(&ctx, key, keylen);
+	if (rc) {
+		pr_err("aes_expandkey() failed: %d\n", rc);
 		return rc;
 	}
 
-	rc = crypto_cipher_setkey(aes_tfm, key, keylen);
-	if (rc) {
-		pr_err("setkey() failed: %d\n", rc);
-		goto out;
-	}
-
-	crypto_cipher_encrypt_one(aes_tfm, src, src);
+	aes_encrypt(&ctx, src, src);
 	crypto4xx_memcpy_to_le32(hash_start, src, 16);
-out:
-	crypto_free_cipher(aes_tfm);
-	return rc;
+	memzero_explicit(&ctx, sizeof(ctx));
+	return 0;
 }
 
 int crypto4xx_setkey_aes_gcm(struct crypto_aead *cipher,
diff --git a/drivers/crypto/atmel-aes.c b/drivers/crypto/atmel-aes.c
index 2b7af44..026f193 100644
--- a/drivers/crypto/atmel-aes.c
+++ b/drivers/crypto/atmel-aes.c
@@ -2673,7 +2673,6 @@
 	/* Get the IRQ */
 	aes_dd->irq = platform_get_irq(pdev,  0);
 	if (aes_dd->irq < 0) {
-		dev_err(dev, "no IRQ resource info\n");
 		err = aes_dd->irq;
 		goto res_err;
 	}
diff --git a/drivers/crypto/atmel-i2c.c b/drivers/crypto/atmel-i2c.c
index dc876fab..1d33559 100644
--- a/drivers/crypto/atmel-i2c.c
+++ b/drivers/crypto/atmel-i2c.c
@@ -21,6 +21,18 @@
 #include <linux/workqueue.h>
 #include "atmel-i2c.h"
 
+static const struct {
+	u8 value;
+	const char *error_text;
+} error_list[] = {
+	{ 0x01, "CheckMac or Verify miscompare" },
+	{ 0x03, "Parse Error" },
+	{ 0x05, "ECC Fault" },
+	{ 0x0F, "Execution Error" },
+	{ 0xEE, "Watchdog about to expire" },
+	{ 0xFF, "CRC or other communication error" },
+};
+
 /**
  * atmel_i2c_checksum() - Generate 16-bit CRC as required by ATMEL ECC.
  * CRC16 verification of the count, opcode, param1, param2 and data bytes.
diff --git a/drivers/crypto/atmel-i2c.h b/drivers/crypto/atmel-i2c.h
index 21860b99..63b97b1 100644
--- a/drivers/crypto/atmel-i2c.h
+++ b/drivers/crypto/atmel-i2c.h
@@ -62,18 +62,6 @@
 #define STATUS_NOERR			0x00
 #define STATUS_WAKE_SUCCESSFUL		0x11
 
-static const struct {
-	u8 value;
-	const char *error_text;
-} error_list[] = {
-	{ 0x01, "CheckMac or Verify miscompare" },
-	{ 0x03, "Parse Error" },
-	{ 0x05, "ECC Fault" },
-	{ 0x0F, "Execution Error" },
-	{ 0xEE, "Watchdog about to expire" },
-	{ 0xFF, "CRC or other communication error" },
-};
-
 /* Definitions for eeprom organization */
 #define CONFIG_ZONE			0
 
diff --git a/drivers/crypto/atmel-sha.c b/drivers/crypto/atmel-sha.c
index ab0cfe7..84cb874 100644
--- a/drivers/crypto/atmel-sha.c
+++ b/drivers/crypto/atmel-sha.c
@@ -2779,7 +2779,6 @@
 	/* Get the IRQ */
 	sha_dd->irq = platform_get_irq(pdev,  0);
 	if (sha_dd->irq < 0) {
-		dev_err(dev, "no IRQ resource info\n");
 		err = sha_dd->irq;
 		goto res_err;
 	}
diff --git a/drivers/crypto/atmel-sha204a.c b/drivers/crypto/atmel-sha204a.c
index ea0d206..c96c14e7 100644
--- a/drivers/crypto/atmel-sha204a.c
+++ b/drivers/crypto/atmel-sha204a.c
@@ -109,7 +109,7 @@
 	i2c_priv->hwrng.read = atmel_sha204a_rng_read;
 	i2c_priv->hwrng.quality = 1024;
 
-	ret = hwrng_register(&i2c_priv->hwrng);
+	ret = devm_hwrng_register(&client->dev, &i2c_priv->hwrng);
 	if (ret)
 		dev_warn(&client->dev, "failed to register RNG (%d)\n", ret);
 
@@ -127,7 +127,6 @@
 
 	if (i2c_priv->hwrng.priv)
 		kfree((void *)i2c_priv->hwrng.priv);
-	hwrng_unregister(&i2c_priv->hwrng);
 
 	return 0;
 }
diff --git a/drivers/crypto/atmel-tdes.c b/drivers/crypto/atmel-tdes.c
index fa76620..1a6c86a 100644
--- a/drivers/crypto/atmel-tdes.c
+++ b/drivers/crypto/atmel-tdes.c
@@ -33,7 +33,7 @@
 #include <linux/cryptohash.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/algapi.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/hash.h>
 #include <crypto/internal/hash.h>
 #include <linux/platform_data/crypto-atmel.h>
@@ -773,22 +773,12 @@
 static int atmel_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 			   unsigned int keylen)
 {
-	u32 tmp[DES_EXPKEY_WORDS];
-	int err;
-	struct crypto_tfm *ctfm = crypto_ablkcipher_tfm(tfm);
-
 	struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
+	int err;
 
-	if (keylen != DES_KEY_SIZE) {
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	err = des_ekey(tmp, key);
-	if (err == 0 && (ctfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		ctfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
+	err = verify_ablkcipher_des_key(tfm, key);
+	if (err)
+		return err;
 
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
@@ -800,15 +790,11 @@
 			   unsigned int keylen)
 {
 	struct atmel_tdes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
-	u32 flags;
 	int err;
 
-	flags = crypto_ablkcipher_get_flags(tfm);
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(tfm, flags);
+	err = verify_ablkcipher_des3_key(tfm, key);
+	if (err)
 		return err;
-	}
 
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
@@ -1281,7 +1267,6 @@
 	/* Get the IRQ */
 	tdes_dd->irq = platform_get_irq(pdev,  0);
 	if (tdes_dd->irq < 0) {
-		dev_err(dev, "no IRQ resource info\n");
 		err = tdes_dd->irq;
 		goto res_err;
 	}
diff --git a/drivers/crypto/axis/artpec6_crypto.c b/drivers/crypto/axis/artpec6_crypto.c
index 80fa04e..4b20606 100644
--- a/drivers/crypto/axis/artpec6_crypto.c
+++ b/drivers/crypto/axis/artpec6_crypto.c
@@ -2854,7 +2854,6 @@
 	struct artpec6_crypto *ac;
 	struct device *dev = &pdev->dev;
 	void __iomem *base;
-	struct resource *res;
 	int irq;
 	int err;
 
@@ -2867,8 +2866,7 @@
 
 	variant = (enum artpec6_crypto_variant)match->data;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	base = devm_ioremap_resource(&pdev->dev, res);
+	base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(base))
 		return PTR_ERR(base);
 
diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c
index 869602f..f85356a 100644
--- a/drivers/crypto/bcm/cipher.c
+++ b/drivers/crypto/bcm/cipher.c
@@ -24,7 +24,7 @@
 #include <crypto/aead.h>
 #include <crypto/internal/aead.h>
 #include <crypto/aes.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/hmac.h>
 #include <crypto/sha.h>
 #include <crypto/md5.h>
@@ -1802,24 +1802,13 @@
 		      unsigned int keylen)
 {
 	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
-	u32 tmp[DES_EXPKEY_WORDS];
+	int err;
 
-	if (keylen == DES_KEY_SIZE) {
-		if (des_ekey(tmp, key) == 0) {
-			if (crypto_ablkcipher_get_flags(cipher) &
-			    CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) {
-				u32 flags = CRYPTO_TFM_RES_WEAK_KEY;
+	err = verify_ablkcipher_des_key(cipher, key);
+	if (err)
+		return err;
 
-				crypto_ablkcipher_set_flags(cipher, flags);
-				return -EINVAL;
-			}
-		}
-
-		ctx->cipher_type = CIPHER_TYPE_DES;
-	} else {
-		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
+	ctx->cipher_type = CIPHER_TYPE_DES;
 	return 0;
 }
 
@@ -1827,23 +1816,13 @@
 			   unsigned int keylen)
 {
 	struct iproc_ctx_s *ctx = crypto_ablkcipher_ctx(cipher);
+	int err;
 
-	if (keylen == (DES_KEY_SIZE * 3)) {
-		u32 flags;
-		int ret;
+	err = verify_ablkcipher_des3_key(cipher, key);
+	if (err)
+		return err;
 
-		flags = crypto_ablkcipher_get_flags(cipher);
-		ret = __des3_verify_key(&flags, key);
-		if (unlikely(ret)) {
-			crypto_ablkcipher_set_flags(cipher, flags);
-			return ret;
-		}
-
-		ctx->cipher_type = CIPHER_TYPE_3DES;
-	} else {
-		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
+	ctx->cipher_type = CIPHER_TYPE_3DES;
 	return 0;
 }
 
@@ -2629,6 +2608,19 @@
 		return 1;
 	}
 
+	/*
+	 * RFC4106 and RFC4543 cannot handle the case where AAD is other than
+	 * 16 or 20 bytes long. So use fallback in this case.
+	 */
+	if (ctx->cipher.mode == CIPHER_MODE_GCM &&
+	    ctx->cipher.alg == CIPHER_ALG_AES &&
+	    rctx->iv_ctr_len == GCM_RFC4106_IV_SIZE &&
+	    req->assoclen != 16 && req->assoclen != 20) {
+		flow_log("RFC4106/RFC4543 needs fallback for assoclen"
+			 " other than 16 or 20 bytes\n");
+		return 1;
+	}
+
 	payload_len = req->cryptlen;
 	if (spu->spu_type == SPU_TYPE_SPUM)
 		payload_len += req->assoclen;
@@ -2855,40 +2847,16 @@
 
 	switch (ctx->alg->cipher_info.alg) {
 	case CIPHER_ALG_DES:
-		if (ctx->enckeylen == DES_KEY_SIZE) {
-			u32 tmp[DES_EXPKEY_WORDS];
-			u32 flags = CRYPTO_TFM_RES_WEAK_KEY;
+		if (verify_aead_des_key(cipher, keys.enckey, keys.enckeylen))
+			return -EINVAL;
 
-			if (des_ekey(tmp, keys.enckey) == 0) {
-				if (crypto_aead_get_flags(cipher) &
-				    CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) {
-					crypto_aead_set_flags(cipher, flags);
-					return -EINVAL;
-				}
-			}
-
-			ctx->cipher_type = CIPHER_TYPE_DES;
-		} else {
-			goto badkey;
-		}
+		ctx->cipher_type = CIPHER_TYPE_DES;
 		break;
 	case CIPHER_ALG_3DES:
-		if (ctx->enckeylen == (DES_KEY_SIZE * 3)) {
-			u32 flags;
-
-			flags = crypto_aead_get_flags(cipher);
-			ret = __des3_verify_key(&flags, keys.enckey);
-			if (unlikely(ret)) {
-				crypto_aead_set_flags(cipher, flags);
-				return ret;
-			}
-
-			ctx->cipher_type = CIPHER_TYPE_3DES;
-		} else {
-			crypto_aead_set_flags(cipher,
-					      CRYPTO_TFM_RES_BAD_KEY_LEN);
+		if (verify_aead_des3_key(cipher, keys.enckey, keys.enckeylen))
 			return -EINVAL;
-		}
+
+		ctx->cipher_type = CIPHER_TYPE_3DES;
 		break;
 	case CIPHER_ALG_AES:
 		switch (ctx->enckeylen) {
diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig
index 3720ddab..137ed3d 100644
--- a/drivers/crypto/caam/Kconfig
+++ b/drivers/crypto/caam/Kconfig
@@ -98,7 +98,7 @@
 	select CRYPTO_AEAD
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
 	  Selecting this will offload crypto for users of the
 	  scatterlist crypto API (such as the linux native IPSec
@@ -111,6 +111,7 @@
 	select CRYPTO_DEV_FSL_CAAM_CRYPTO_API_DESC
 	select CRYPTO_AUTHENC
 	select CRYPTO_BLKCIPHER
+	select CRYPTO_DES
 	help
 	  Selecting this will use CAAM Queue Interface (QI) for sending
 	  & receiving crypto jobs to/from CAAM. This gives better performance
@@ -161,6 +162,7 @@
 	select CRYPTO_AUTHENC
 	select CRYPTO_AEAD
 	select CRYPTO_HASH
+	select CRYPTO_DES
 	help
 	  CAAM driver for QorIQ Data Path Acceleration Architecture 2.
 	  It handles DPSECI DPAA2 objects that sit on the Management Complex
diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile
index 9ab4e81..68d5cc0 100644
--- a/drivers/crypto/caam/Makefile
+++ b/drivers/crypto/caam/Makefile
@@ -30,3 +30,4 @@
 obj-$(CONFIG_CRYPTO_DEV_FSL_DPAA2_CAAM) += dpaa2_caam.o
 
 dpaa2_caam-y    := caamalg_qi2.o dpseci.o
+dpaa2_caam-$(CONFIG_DEBUG_FS) += dpseci-debugfs.o
diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c
index 43f1825..2912006 100644
--- a/drivers/crypto/caam/caamalg.c
+++ b/drivers/crypto/caam/caamalg.c
@@ -74,7 +74,7 @@
 
 #define CHACHAPOLY_DESC_JOB_IO_LEN	(AEAD_DESC_JOB_IO_LEN + CAAM_CMD_SZ * 6)
 
-#define DESC_MAX_USED_BYTES		(CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN)
+#define DESC_MAX_USED_BYTES		(CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN_MIN)
 #define DESC_MAX_USED_LEN		(DESC_MAX_USED_BYTES / CAAM_CMD_SZ)
 
 struct caam_alg_entry {
@@ -205,6 +205,18 @@
 				ctx->cdata.keylen - CTR_RFC3686_NONCE_SIZE);
 	}
 
+	/*
+	 * In case |user key| > |derived key|, using DKP<imm,imm>
+	 * would result in invalid opcodes (last bytes of user key) in
+	 * the resulting descriptor. Use DKP<ptr,imm> instead => both
+	 * virtual and dma key addresses are needed.
+	 */
+	ctx->adata.key_virt = ctx->key;
+	ctx->adata.key_dma = ctx->key_dma;
+
+	ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
+	ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
+
 	data_len[0] = ctx->adata.keylen_pad;
 	data_len[1] = ctx->cdata.keylen;
 
@@ -221,16 +233,6 @@
 			      ARRAY_SIZE(data_len)) < 0)
 		return -EINVAL;
 
-	if (inl_mask & 1)
-		ctx->adata.key_virt = ctx->key;
-	else
-		ctx->adata.key_dma = ctx->key_dma;
-
-	if (inl_mask & 2)
-		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
-	else
-		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
-
 	ctx->adata.key_inline = !!(inl_mask & 1);
 	ctx->cdata.key_inline = !!(inl_mask & 2);
 
@@ -253,16 +255,6 @@
 			      ARRAY_SIZE(data_len)) < 0)
 		return -EINVAL;
 
-	if (inl_mask & 1)
-		ctx->adata.key_virt = ctx->key;
-	else
-		ctx->adata.key_dma = ctx->key_dma;
-
-	if (inl_mask & 2)
-		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
-	else
-		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
-
 	ctx->adata.key_inline = !!(inl_mask & 1);
 	ctx->cdata.key_inline = !!(inl_mask & 2);
 
@@ -287,16 +279,6 @@
 			      ARRAY_SIZE(data_len)) < 0)
 		return -EINVAL;
 
-	if (inl_mask & 1)
-		ctx->adata.key_virt = ctx->key;
-	else
-		ctx->adata.key_dma = ctx->key_dma;
-
-	if (inl_mask & 2)
-		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
-	else
-		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
-
 	ctx->adata.key_inline = !!(inl_mask & 1);
 	ctx->cdata.key_inline = !!(inl_mask & 2);
 
@@ -376,6 +358,11 @@
 static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	int err;
+
+	err = crypto_gcm_check_authsize(authsize);
+	if (err)
+		return err;
 
 	ctx->authsize = authsize;
 	gcm_set_sh_desc(authenc);
@@ -439,6 +426,11 @@
 			       unsigned int authsize)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	int err;
+
+	err = crypto_rfc4106_check_authsize(authsize);
+	if (err)
+		return err;
 
 	ctx->authsize = authsize;
 	rfc4106_set_sh_desc(authenc);
@@ -503,6 +495,9 @@
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
 
+	if (authsize != 16)
+		return -EINVAL;
+
 	ctx->authsize = authsize;
 	rfc4543_set_sh_desc(authenc);
 
@@ -633,33 +628,17 @@
 			    unsigned int keylen)
 {
 	struct crypto_authenc_keys keys;
-	u32 flags;
 	int err;
 
 	err = crypto_authenc_extractkeys(&keys, key, keylen);
 	if (unlikely(err))
-		goto badkey;
+		return err;
 
-	err = -EINVAL;
-	if (keys.enckeylen != DES3_EDE_KEY_SIZE)
-		goto badkey;
+	err = verify_aead_des3_key(aead, keys.enckey, keys.enckeylen) ?:
+	      aead_setkey(aead, key, keylen);
 
-	flags = crypto_aead_get_flags(aead);
-	err = __des3_verify_key(&flags, keys.enckey);
-	if (unlikely(err)) {
-		crypto_aead_set_flags(aead, flags);
-		goto out;
-	}
-
-	err = aead_setkey(aead, key, keylen);
-
-out:
 	memzero_explicit(&keys, sizeof(keys));
 	return err;
-
-badkey:
-	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	goto out;
 }
 
 static int gcm_setkey(struct crypto_aead *aead,
@@ -667,6 +646,13 @@
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	int err;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
 
 	print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -683,9 +669,13 @@
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	int err;
 
-	if (keylen < 4)
-		return -EINVAL;
+	err = aes_check_keylen(keylen - 4);
+	if (err) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
 
 	print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -707,9 +697,13 @@
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *jrdev = ctx->jrdev;
+	int err;
 
-	if (keylen < 4)
-		return -EINVAL;
+	err = aes_check_keylen(keylen - 4);
+	if (err) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
 
 	print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -727,7 +721,7 @@
 }
 
 static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
-			   unsigned int keylen)
+			   unsigned int keylen, const u32 ctx1_iv_off)
 {
 	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct caam_skcipher_alg *alg =
@@ -736,30 +730,10 @@
 	struct device *jrdev = ctx->jrdev;
 	unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
 	u32 *desc;
-	u32 ctx1_iv_off = 0;
-	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
-			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = alg->caam.rfc3686;
 
 	print_hex_dump_debug("key in @"__stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
-	/*
-	 * AES-CTR needs to load IV in CONTEXT1 reg
-	 * at an offset of 128bits (16bytes)
-	 * CONTEXT1[255:128] = IV
-	 */
-	if (ctr_mode)
-		ctx1_iv_off = 16;
-
-	/*
-	 * RFC3686 specific:
-	 *	| CONTEXT1[255:128] = {NONCE, IV, COUNTER}
-	 *	| *key = {KEY, NONCE}
-	 */
-	if (is_rfc3686) {
-		ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
-		keylen -= CTR_RFC3686_NONCE_SIZE;
-	}
 
 	ctx->cdata.keylen = keylen;
 	ctx->cdata.key_virt = key;
@@ -782,25 +756,86 @@
 	return 0;
 }
 
+static int aes_skcipher_setkey(struct crypto_skcipher *skcipher,
+			       const u8 *key, unsigned int keylen)
+{
+	int err;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_skcipher_set_flags(skcipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
+
+	return skcipher_setkey(skcipher, key, keylen, 0);
+}
+
+static int rfc3686_skcipher_setkey(struct crypto_skcipher *skcipher,
+				   const u8 *key, unsigned int keylen)
+{
+	u32 ctx1_iv_off;
+	int err;
+
+	/*
+	 * RFC3686 specific:
+	 *	| CONTEXT1[255:128] = {NONCE, IV, COUNTER}
+	 *	| *key = {KEY, NONCE}
+	 */
+	ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
+	keylen -= CTR_RFC3686_NONCE_SIZE;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_skcipher_set_flags(skcipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
+
+	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
+}
+
+static int ctr_skcipher_setkey(struct crypto_skcipher *skcipher,
+			       const u8 *key, unsigned int keylen)
+{
+	u32 ctx1_iv_off;
+	int err;
+
+	/*
+	 * AES-CTR needs to load IV in CONTEXT1 reg
+	 * at an offset of 128bits (16bytes)
+	 * CONTEXT1[255:128] = IV
+	 */
+	ctx1_iv_off = 16;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_skcipher_set_flags(skcipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
+
+	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
+}
+
+static int arc4_skcipher_setkey(struct crypto_skcipher *skcipher,
+				const u8 *key, unsigned int keylen)
+{
+	return skcipher_setkey(skcipher, key, keylen, 0);
+}
+
 static int des_skcipher_setkey(struct crypto_skcipher *skcipher,
 			       const u8 *key, unsigned int keylen)
 {
-	u32 tmp[DES3_EDE_EXPKEY_WORDS];
-	struct crypto_tfm *tfm = crypto_skcipher_tfm(skcipher);
+	return verify_skcipher_des_key(skcipher, key) ?:
+	       skcipher_setkey(skcipher, key, keylen, 0);
+}
 
-	if (keylen == DES3_EDE_KEY_SIZE &&
-	    __des3_ede_setkey(tmp, &tfm->crt_flags, key, DES3_EDE_KEY_SIZE)) {
-		return -EINVAL;
-	}
-
-	if (!des_ekey(tmp, key) && (crypto_skcipher_get_flags(skcipher) &
-	    CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		crypto_skcipher_set_flags(skcipher,
-					  CRYPTO_TFM_RES_WEAK_KEY);
-		return -EINVAL;
-	}
-
-	return skcipher_setkey(skcipher, key, keylen);
+static int des3_skcipher_setkey(struct crypto_skcipher *skcipher,
+				const u8 *key, unsigned int keylen)
+{
+	return verify_skcipher_des3_key(skcipher, key) ?:
+	       skcipher_setkey(skcipher, key, keylen, 0);
 }
 
 static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
@@ -930,19 +965,20 @@
 {
 	struct aead_request *req = context;
 	struct aead_edesc *edesc;
+	int ecode = 0;
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
 	edesc = container_of(desc, struct aead_edesc, hw_desc[0]);
 
 	if (err)
-		caam_jr_strstatus(jrdev, err);
+		ecode = caam_jr_strstatus(jrdev, err);
 
 	aead_unmap(jrdev, edesc, req);
 
 	kfree(edesc);
 
-	aead_request_complete(req, err);
+	aead_request_complete(req, ecode);
 }
 
 static void aead_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
@@ -950,25 +986,20 @@
 {
 	struct aead_request *req = context;
 	struct aead_edesc *edesc;
+	int ecode = 0;
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
 	edesc = container_of(desc, struct aead_edesc, hw_desc[0]);
 
 	if (err)
-		caam_jr_strstatus(jrdev, err);
+		ecode = caam_jr_strstatus(jrdev, err);
 
 	aead_unmap(jrdev, edesc, req);
 
-	/*
-	 * verify hw auth check passed else return -EBADMSG
-	 */
-	if ((err & JRSTA_CCBERR_ERRID_MASK) == JRSTA_CCBERR_ERRID_ICVCHK)
-		err = -EBADMSG;
-
 	kfree(edesc);
 
-	aead_request_complete(req, err);
+	aead_request_complete(req, ecode);
 }
 
 static void skcipher_encrypt_done(struct device *jrdev, u32 *desc, u32 err,
@@ -978,13 +1009,14 @@
 	struct skcipher_edesc *edesc;
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
 	int ivsize = crypto_skcipher_ivsize(skcipher);
+	int ecode = 0;
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
 	edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]);
 
 	if (err)
-		caam_jr_strstatus(jrdev, err);
+		ecode = caam_jr_strstatus(jrdev, err);
 
 	skcipher_unmap(jrdev, edesc, req);
 
@@ -993,10 +1025,9 @@
 	 * ciphertext block (CBC mode) or last counter (CTR mode).
 	 * This is used e.g. by the CTS mode.
 	 */
-	if (ivsize) {
+	if (ivsize && !ecode) {
 		memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes,
 		       ivsize);
-
 		print_hex_dump_debug("dstiv  @"__stringify(__LINE__)": ",
 				     DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
 				     edesc->src_nents > 1 ? 100 : ivsize, 1);
@@ -1008,7 +1039,7 @@
 
 	kfree(edesc);
 
-	skcipher_request_complete(req, err);
+	skcipher_request_complete(req, ecode);
 }
 
 static void skcipher_decrypt_done(struct device *jrdev, u32 *desc, u32 err,
@@ -1018,12 +1049,13 @@
 	struct skcipher_edesc *edesc;
 	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
 	int ivsize = crypto_skcipher_ivsize(skcipher);
+	int ecode = 0;
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
 	edesc = container_of(desc, struct skcipher_edesc, hw_desc[0]);
 	if (err)
-		caam_jr_strstatus(jrdev, err);
+		ecode = caam_jr_strstatus(jrdev, err);
 
 	skcipher_unmap(jrdev, edesc, req);
 
@@ -1032,7 +1064,7 @@
 	 * ciphertext block (CBC mode) or last counter (CTR mode).
 	 * This is used e.g. by the CTS mode.
 	 */
-	if (ivsize) {
+	if (ivsize && !ecode) {
 		memcpy(req->iv, (u8 *)edesc->sec4_sg + edesc->sec4_sg_bytes,
 		       ivsize);
 
@@ -1047,7 +1079,7 @@
 
 	kfree(edesc);
 
-	skcipher_request_complete(req, err);
+	skcipher_request_complete(req, ecode);
 }
 
 /*
@@ -1525,10 +1557,7 @@
 
 static int ipsec_gcm_encrypt(struct aead_request *req)
 {
-	if (req->assoclen < 8)
-		return -EINVAL;
-
-	return gcm_encrypt(req);
+	return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_encrypt(req);
 }
 
 static int aead_encrypt(struct aead_request *req)
@@ -1602,10 +1631,7 @@
 
 static int ipsec_gcm_decrypt(struct aead_request *req)
 {
-	if (req->assoclen < 8)
-		return -EINVAL;
-
-	return gcm_decrypt(req);
+	return crypto_ipsec_check_assoclen(req->assoclen) ? : gcm_decrypt(req);
 }
 
 static int aead_decrypt(struct aead_request *req)
@@ -1817,6 +1843,9 @@
 	u32 *desc;
 	int ret = 0;
 
+	if (!req->cryptlen)
+		return 0;
+
 	/* allocate extended descriptor */
 	edesc = skcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
 	if (IS_ERR(edesc))
@@ -1851,6 +1880,9 @@
 	u32 *desc;
 	int ret = 0;
 
+	if (!req->cryptlen)
+		return 0;
+
 	/* allocate extended descriptor */
 	edesc = skcipher_edesc_alloc(req, DESC_JOB_IO_LEN * CAAM_CMD_SZ);
 	if (IS_ERR(edesc))
@@ -1883,7 +1915,7 @@
 				.cra_driver_name = "cbc-aes-caam",
 				.cra_blocksize = AES_BLOCK_SIZE,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = aes_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
@@ -1899,7 +1931,7 @@
 				.cra_driver_name = "cbc-3des-caam",
 				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
 			},
-			.setkey = des_skcipher_setkey,
+			.setkey = des3_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = DES3_EDE_KEY_SIZE,
@@ -1931,7 +1963,7 @@
 				.cra_driver_name = "ctr-aes-caam",
 				.cra_blocksize = 1,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = ctr_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
@@ -1949,7 +1981,7 @@
 				.cra_driver_name = "rfc3686-ctr-aes-caam",
 				.cra_blocksize = 1,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = rfc3686_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE +
@@ -2003,7 +2035,7 @@
 				.cra_driver_name = "ecb-aes-caam",
 				.cra_blocksize = AES_BLOCK_SIZE,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = aes_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
@@ -2018,7 +2050,7 @@
 				.cra_driver_name = "ecb-des3-caam",
 				.cra_blocksize = DES3_EDE_BLOCK_SIZE,
 			},
-			.setkey = des_skcipher_setkey,
+			.setkey = des3_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = DES3_EDE_KEY_SIZE,
@@ -2033,7 +2065,7 @@
 				.cra_driver_name = "ecb-arc4-caam",
 				.cra_blocksize = ARC4_BLOCK_SIZE,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = arc4_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = ARC4_MIN_KEY_SIZE,
diff --git a/drivers/crypto/caam/caamalg_desc.c b/drivers/crypto/caam/caamalg_desc.c
index 7253183..aa9ccca 100644
--- a/drivers/crypto/caam/caamalg_desc.c
+++ b/drivers/crypto/caam/caamalg_desc.c
@@ -503,6 +503,7 @@
 			       const bool is_qi, int era)
 {
 	u32 geniv, moveiv;
+	u32 *wait_cmd;
 
 	/* Note: Context registers are saved. */
 	init_sh_desc_key_aead(desc, cdata, adata, is_rfc3686, nonce, era);
@@ -598,6 +599,14 @@
 
 	/* Will read cryptlen */
 	append_math_add(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
+
+	/*
+	 * Wait for IV transfer (ofifo -> class2) to finish before starting
+	 * ciphertext transfer (ofifo -> external memory).
+	 */
+	wait_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | JUMP_COND_NIFP);
+	set_jump_tgt_here(desc, wait_cmd);
+
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF |
 			     FIFOLD_TYPE_MSG1OUT2 | FIFOLD_TYPE_LASTBOTH);
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF);
@@ -843,13 +852,16 @@
  * @ivsize: initialization vector size
  * @icvsize: integrity check value (ICV) size (truncated or full)
  * @is_qi: true when called from caam/qi
+ *
+ * Input sequence: AAD | PTXT
+ * Output sequence: AAD | CTXT | ICV
+ * AAD length (assoclen), which includes the IV length, is available in Math3.
  */
 void cnstr_shdsc_rfc4106_encap(u32 * const desc, struct alginfo *cdata,
 			       unsigned int ivsize, unsigned int icvsize,
 			       const bool is_qi)
 {
-	u32 *key_jump_cmd;
-
+	u32 *key_jump_cmd, *zero_cryptlen_jump_cmd, *skip_instructions;
 	init_sh_desc(desc, HDR_SHARE_SERIAL);
 
 	/* Skip key loading if it is loaded due to sharing */
@@ -892,24 +904,26 @@
 	append_math_sub_imm_u32(desc, VARSEQINLEN, REG3, IMM, ivsize);
 	append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ);
 
-	/* Read assoc data */
+	/* Skip AAD */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
+
+	/* Read cryptlen and set this value into VARSEQOUTLEN */
+	append_math_sub(desc, VARSEQOUTLEN, SEQINLEN, REG3, CAAM_CMD_SZ);
+
+	/* If cryptlen is ZERO jump to AAD command */
+	zero_cryptlen_jump_cmd = append_jump(desc, JUMP_TEST_ALL |
+					    JUMP_COND_MATH_Z);
+
+	/* Read AAD data */
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_FLUSH1);
 
+	/* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
+	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA);
+
 	/* Skip IV */
 	append_seq_fifo_load(desc, ivsize, FIFOLD_CLASS_SKIP);
-
-	/* Will read cryptlen bytes */
-	append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG0, CAAM_CMD_SZ);
-
-	/* Workaround for erratum A-005473 (simultaneous SEQ FIFO skips) */
-	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLD_TYPE_MSG);
-
-	/* Skip assoc data */
-	append_seq_fifo_store(desc, 0, FIFOST_TYPE_SKIP | FIFOLDST_VLF);
-
-	/* cryptlen = seqoutlen - assoclen */
-	append_math_sub(desc, VARSEQOUTLEN, VARSEQINLEN, REG0, CAAM_CMD_SZ);
+	append_math_add(desc, VARSEQINLEN, VARSEQOUTLEN, REG0, CAAM_CMD_SZ);
 
 	/* Write encrypted data */
 	append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | FIFOLDST_VLF);
@@ -918,6 +932,18 @@
 	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
 			     FIFOLD_TYPE_MSG | FIFOLD_TYPE_LAST1);
 
+	/* Jump instructions to avoid double reading of AAD */
+	skip_instructions = append_jump(desc, JUMP_TEST_ALL);
+
+	/* There is no input data, cryptlen = 0 */
+	set_jump_tgt_here(desc, zero_cryptlen_jump_cmd);
+
+	/* Read AAD */
+	append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS1 | FIFOLDST_VLF |
+			     FIFOLD_TYPE_AAD | FIFOLD_TYPE_LAST1);
+
+	set_jump_tgt_here(desc, skip_instructions);
+
 	/* Write ICV */
 	append_seq_store(desc, icvsize, LDST_CLASS_1_CCB |
 			 LDST_SRCDST_BYTE_CONTEXT);
diff --git a/drivers/crypto/caam/caamalg_desc.h b/drivers/crypto/caam/caamalg_desc.h
index da4a4ee..f289339 100644
--- a/drivers/crypto/caam/caamalg_desc.h
+++ b/drivers/crypto/caam/caamalg_desc.h
@@ -12,7 +12,7 @@
 #define DESC_AEAD_BASE			(4 * CAAM_CMD_SZ)
 #define DESC_AEAD_ENC_LEN		(DESC_AEAD_BASE + 11 * CAAM_CMD_SZ)
 #define DESC_AEAD_DEC_LEN		(DESC_AEAD_BASE + 15 * CAAM_CMD_SZ)
-#define DESC_AEAD_GIVENC_LEN		(DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ)
+#define DESC_AEAD_GIVENC_LEN		(DESC_AEAD_ENC_LEN + 8 * CAAM_CMD_SZ)
 #define DESC_QI_AEAD_ENC_LEN		(DESC_AEAD_ENC_LEN + 3 * CAAM_CMD_SZ)
 #define DESC_QI_AEAD_DEC_LEN		(DESC_AEAD_DEC_LEN + 3 * CAAM_CMD_SZ)
 #define DESC_QI_AEAD_GIVENC_LEN		(DESC_AEAD_GIVENC_LEN + 3 * CAAM_CMD_SZ)
@@ -31,7 +31,7 @@
 #define DESC_QI_GCM_DEC_LEN		(DESC_GCM_DEC_LEN + 3 * CAAM_CMD_SZ)
 
 #define DESC_RFC4106_BASE		(3 * CAAM_CMD_SZ)
-#define DESC_RFC4106_ENC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
+#define DESC_RFC4106_ENC_LEN		(DESC_RFC4106_BASE + 16 * CAAM_CMD_SZ)
 #define DESC_RFC4106_DEC_LEN		(DESC_RFC4106_BASE + 13 * CAAM_CMD_SZ)
 #define DESC_QI_RFC4106_ENC_LEN		(DESC_RFC4106_ENC_LEN + 5 * CAAM_CMD_SZ)
 #define DESC_QI_RFC4106_DEC_LEN		(DESC_RFC4106_DEC_LEN + 5 * CAAM_CMD_SZ)
diff --git a/drivers/crypto/caam/caamalg_qi.c b/drivers/crypto/caam/caamalg_qi.c
index 32f0f8a..8e34496 100644
--- a/drivers/crypto/caam/caamalg_qi.c
+++ b/drivers/crypto/caam/caamalg_qi.c
@@ -105,6 +105,18 @@
 				ctx->cdata.keylen - CTR_RFC3686_NONCE_SIZE);
 	}
 
+	/*
+	 * In case |user key| > |derived key|, using DKP<imm,imm> would result
+	 * in invalid opcodes (last bytes of user key) in the resulting
+	 * descriptor. Use DKP<ptr,imm> instead => both virtual and dma key
+	 * addresses are needed.
+	 */
+	ctx->adata.key_virt = ctx->key;
+	ctx->adata.key_dma = ctx->key_dma;
+
+	ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
+	ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
+
 	data_len[0] = ctx->adata.keylen_pad;
 	data_len[1] = ctx->cdata.keylen;
 
@@ -118,16 +130,6 @@
 			      ARRAY_SIZE(data_len)) < 0)
 		return -EINVAL;
 
-	if (inl_mask & 1)
-		ctx->adata.key_virt = ctx->key;
-	else
-		ctx->adata.key_dma = ctx->key_dma;
-
-	if (inl_mask & 2)
-		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
-	else
-		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
-
 	ctx->adata.key_inline = !!(inl_mask & 1);
 	ctx->cdata.key_inline = !!(inl_mask & 2);
 
@@ -143,16 +145,6 @@
 			      ARRAY_SIZE(data_len)) < 0)
 		return -EINVAL;
 
-	if (inl_mask & 1)
-		ctx->adata.key_virt = ctx->key;
-	else
-		ctx->adata.key_dma = ctx->key_dma;
-
-	if (inl_mask & 2)
-		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
-	else
-		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
-
 	ctx->adata.key_inline = !!(inl_mask & 1);
 	ctx->cdata.key_inline = !!(inl_mask & 2);
 
@@ -171,16 +163,6 @@
 			      ARRAY_SIZE(data_len)) < 0)
 		return -EINVAL;
 
-	if (inl_mask & 1)
-		ctx->adata.key_virt = ctx->key;
-	else
-		ctx->adata.key_dma = ctx->key_dma;
-
-	if (inl_mask & 2)
-		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
-	else
-		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
-
 	ctx->adata.key_inline = !!(inl_mask & 1);
 	ctx->cdata.key_inline = !!(inl_mask & 2);
 
@@ -252,11 +234,10 @@
 	dma_sync_single_for_device(jrdev->parent, ctx->key_dma,
 				   ctx->adata.keylen_pad + keys.enckeylen,
 				   ctx->dir);
-#ifdef DEBUG
-	print_hex_dump(KERN_ERR, "ctx.key@" __stringify(__LINE__)": ",
-		       DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
-		       ctx->adata.keylen_pad + keys.enckeylen, 1);
-#endif
+
+	print_hex_dump_debug("ctx.key@" __stringify(__LINE__)": ",
+			     DUMP_PREFIX_ADDRESS, 16, 4, ctx->key,
+			     ctx->adata.keylen_pad + keys.enckeylen, 1);
 
 skip_split_key:
 	ctx->cdata.keylen = keys.enckeylen;
@@ -296,33 +277,17 @@
 			    unsigned int keylen)
 {
 	struct crypto_authenc_keys keys;
-	u32 flags;
 	int err;
 
 	err = crypto_authenc_extractkeys(&keys, key, keylen);
 	if (unlikely(err))
-		goto badkey;
+		return err;
 
-	err = -EINVAL;
-	if (keys.enckeylen != DES3_EDE_KEY_SIZE)
-		goto badkey;
+	err = verify_aead_des3_key(aead, keys.enckey, keys.enckeylen) ?:
+	      aead_setkey(aead, key, keylen);
 
-	flags = crypto_aead_get_flags(aead);
-	err = __des3_verify_key(&flags, keys.enckey);
-	if (unlikely(err)) {
-		crypto_aead_set_flags(aead, flags);
-		goto out;
-	}
-
-	err = aead_setkey(aead, key, keylen);
-
-out:
 	memzero_explicit(&keys, sizeof(keys));
 	return err;
-
-badkey:
-	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	goto out;
 }
 
 static int gcm_set_sh_desc(struct crypto_aead *aead)
@@ -371,6 +336,11 @@
 static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	int err;
+
+	err = crypto_gcm_check_authsize(authsize);
+	if (err)
+		return err;
 
 	ctx->authsize = authsize;
 	gcm_set_sh_desc(authenc);
@@ -385,6 +355,12 @@
 	struct device *jrdev = ctx->jrdev;
 	int ret;
 
+	ret = aes_check_keylen(keylen);
+	if (ret) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
+
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 
@@ -466,6 +442,11 @@
 			       unsigned int authsize)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	int err;
+
+	err = crypto_rfc4106_check_authsize(authsize);
+	if (err)
+		return err;
 
 	ctx->authsize = authsize;
 	rfc4106_set_sh_desc(authenc);
@@ -480,8 +461,11 @@
 	struct device *jrdev = ctx->jrdev;
 	int ret;
 
-	if (keylen < 4)
-		return -EINVAL;
+	ret = aes_check_keylen(keylen - 4);
+	if (ret) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -569,6 +553,9 @@
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
 
+	if (authsize != 16)
+		return -EINVAL;
+
 	ctx->authsize = authsize;
 	rfc4543_set_sh_desc(authenc);
 
@@ -582,8 +569,11 @@
 	struct device *jrdev = ctx->jrdev;
 	int ret;
 
-	if (keylen < 4)
-		return -EINVAL;
+	ret = aes_check_keylen(keylen - 4);
+	if (ret) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -624,7 +614,7 @@
 }
 
 static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
-			   unsigned int keylen)
+			   unsigned int keylen, const u32 ctx1_iv_off)
 {
 	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct caam_skcipher_alg *alg =
@@ -632,33 +622,12 @@
 			     skcipher);
 	struct device *jrdev = ctx->jrdev;
 	unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
-	u32 ctx1_iv_off = 0;
-	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
-			       OP_ALG_AAI_CTR_MOD128);
 	const bool is_rfc3686 = alg->caam.rfc3686;
 	int ret = 0;
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 
-	/*
-	 * AES-CTR needs to load IV in CONTEXT1 reg
-	 * at an offset of 128bits (16bytes)
-	 * CONTEXT1[255:128] = IV
-	 */
-	if (ctr_mode)
-		ctx1_iv_off = 16;
-
-	/*
-	 * RFC3686 specific:
-	 *	| CONTEXT1[255:128] = {NONCE, IV, COUNTER}
-	 *	| *key = {KEY, NONCE}
-	 */
-	if (is_rfc3686) {
-		ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
-		keylen -= CTR_RFC3686_NONCE_SIZE;
-	}
-
 	ctx->cdata.keylen = keylen;
 	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
@@ -694,11 +663,80 @@
 	return -EINVAL;
 }
 
+static int aes_skcipher_setkey(struct crypto_skcipher *skcipher,
+			       const u8 *key, unsigned int keylen)
+{
+	int err;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_skcipher_set_flags(skcipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
+
+	return skcipher_setkey(skcipher, key, keylen, 0);
+}
+
+static int rfc3686_skcipher_setkey(struct crypto_skcipher *skcipher,
+				   const u8 *key, unsigned int keylen)
+{
+	u32 ctx1_iv_off;
+	int err;
+
+	/*
+	 * RFC3686 specific:
+	 *	| CONTEXT1[255:128] = {NONCE, IV, COUNTER}
+	 *	| *key = {KEY, NONCE}
+	 */
+	ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
+	keylen -= CTR_RFC3686_NONCE_SIZE;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_skcipher_set_flags(skcipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
+
+	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
+}
+
+static int ctr_skcipher_setkey(struct crypto_skcipher *skcipher,
+			       const u8 *key, unsigned int keylen)
+{
+	u32 ctx1_iv_off;
+	int err;
+
+	/*
+	 * AES-CTR needs to load IV in CONTEXT1 reg
+	 * at an offset of 128bits (16bytes)
+	 * CONTEXT1[255:128] = IV
+	 */
+	ctx1_iv_off = 16;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_skcipher_set_flags(skcipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
+
+	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
+}
+
 static int des3_skcipher_setkey(struct crypto_skcipher *skcipher,
 				const u8 *key, unsigned int keylen)
 {
-	return unlikely(des3_verify_key(skcipher, key)) ?:
-	       skcipher_setkey(skcipher, key, keylen);
+	return verify_skcipher_des3_key(skcipher, key) ?:
+	       skcipher_setkey(skcipher, key, keylen, 0);
+}
+
+static int des_skcipher_setkey(struct crypto_skcipher *skcipher,
+			       const u8 *key, unsigned int keylen)
+{
+	return verify_skcipher_des_key(skcipher, key) ?:
+	       skcipher_setkey(skcipher, key, keylen, 0);
 }
 
 static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
@@ -884,20 +922,8 @@
 
 	qidev = caam_ctx->qidev;
 
-	if (unlikely(status)) {
-		u32 ssrc = status & JRSTA_SSRC_MASK;
-		u8 err_id = status & JRSTA_CCBERR_ERRID_MASK;
-
-		caam_jr_strstatus(qidev, status);
-		/*
-		 * verify hw auth check passed else return -EBADMSG
-		 */
-		if (ssrc == JRSTA_SSRC_CCB_ERROR &&
-		    err_id == JRSTA_CCBERR_ERRID_ICVCHK)
-			ecode = -EBADMSG;
-		else
-			ecode = -EIO;
-	}
+	if (unlikely(status))
+		ecode = caam_jr_strstatus(qidev, status);
 
 	edesc = container_of(drv_req, typeof(*edesc), drv_req);
 	aead_unmap(qidev, edesc, aead_req);
@@ -1168,18 +1194,14 @@
 
 static int ipsec_gcm_encrypt(struct aead_request *req)
 {
-	if (req->assoclen < 8)
-		return -EINVAL;
-
-	return aead_crypt(req, true);
+	return crypto_ipsec_check_assoclen(req->assoclen) ? : aead_crypt(req,
+					   true);
 }
 
 static int ipsec_gcm_decrypt(struct aead_request *req)
 {
-	if (req->assoclen < 8)
-		return -EINVAL;
-
-	return aead_crypt(req, false);
+	return crypto_ipsec_check_assoclen(req->assoclen) ? : aead_crypt(req,
+					   false);
 }
 
 static void skcipher_done(struct caam_drv_req *drv_req, u32 status)
@@ -1190,13 +1212,14 @@
 	struct caam_ctx *caam_ctx = crypto_skcipher_ctx(skcipher);
 	struct device *qidev = caam_ctx->qidev;
 	int ivsize = crypto_skcipher_ivsize(skcipher);
+	int ecode = 0;
 
 	dev_dbg(qidev, "%s %d: status 0x%x\n", __func__, __LINE__, status);
 
 	edesc = container_of(drv_req, typeof(*edesc), drv_req);
 
 	if (status)
-		caam_jr_strstatus(qidev, status);
+		ecode = caam_jr_strstatus(qidev, status);
 
 	print_hex_dump_debug("dstiv  @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
@@ -1212,10 +1235,12 @@
 	 * ciphertext block (CBC mode) or last counter (CTR mode).
 	 * This is used e.g. by the CTS mode.
 	 */
-	memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes, ivsize);
+	if (!ecode)
+		memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes,
+		       ivsize);
 
 	qi_cache_free(edesc);
-	skcipher_request_complete(req, status);
+	skcipher_request_complete(req, ecode);
 }
 
 static struct skcipher_edesc *skcipher_edesc_alloc(struct skcipher_request *req,
@@ -1377,6 +1402,9 @@
 	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	int ret;
 
+	if (!req->cryptlen)
+		return 0;
+
 	if (unlikely(caam_congested))
 		return -EAGAIN;
 
@@ -1414,7 +1442,7 @@
 				.cra_driver_name = "cbc-aes-caam-qi",
 				.cra_blocksize = AES_BLOCK_SIZE,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = aes_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
@@ -1446,7 +1474,7 @@
 				.cra_driver_name = "cbc-des-caam-qi",
 				.cra_blocksize = DES_BLOCK_SIZE,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = des_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = DES_KEY_SIZE,
@@ -1462,7 +1490,7 @@
 				.cra_driver_name = "ctr-aes-caam-qi",
 				.cra_blocksize = 1,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = ctr_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
@@ -1480,7 +1508,7 @@
 				.cra_driver_name = "rfc3686-ctr-aes-caam-qi",
 				.cra_blocksize = 1,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = rfc3686_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE +
@@ -2523,10 +2551,9 @@
 	unsigned int md_limit = SHA512_DIGEST_SIZE;
 	bool registered = false;
 
-	if (caam_dpaa2) {
-		dev_info(ctrldev, "caam/qi frontend driver not suitable for DPAA 2.x, aborting...\n");
-		return -ENODEV;
-	}
+	/* Make sure this runs only on (DPAA 1.x) QI */
+	if (!priv->qi_present || caam_dpaa2)
+		return 0;
 
 	/*
 	 * Register crypto algorithms the device supports.
diff --git a/drivers/crypto/caam/caamalg_qi2.c b/drivers/crypto/caam/caamalg_qi2.c
index 06bf32c..3443f6d 100644
--- a/drivers/crypto/caam/caamalg_qi2.c
+++ b/drivers/crypto/caam/caamalg_qi2.c
@@ -15,6 +15,7 @@
 #include "key_gen.h"
 #include "caamalg_desc.h"
 #include "caamhash_desc.h"
+#include "dpseci-debugfs.h"
 #include <linux/fsl/mc.h>
 #include <soc/fsl/dpaa2-io.h>
 #include <soc/fsl/dpaa2-fd.h>
@@ -198,6 +199,18 @@
 				ctx->cdata.keylen - CTR_RFC3686_NONCE_SIZE);
 	}
 
+	/*
+	 * In case |user key| > |derived key|, using DKP<imm,imm> would result
+	 * in invalid opcodes (last bytes of user key) in the resulting
+	 * descriptor. Use DKP<ptr,imm> instead => both virtual and dma key
+	 * addresses are needed.
+	 */
+	ctx->adata.key_virt = ctx->key;
+	ctx->adata.key_dma = ctx->key_dma;
+
+	ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
+	ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
+
 	data_len[0] = ctx->adata.keylen_pad;
 	data_len[1] = ctx->cdata.keylen;
 
@@ -209,16 +222,6 @@
 			      ARRAY_SIZE(data_len)) < 0)
 		return -EINVAL;
 
-	if (inl_mask & 1)
-		ctx->adata.key_virt = ctx->key;
-	else
-		ctx->adata.key_dma = ctx->key_dma;
-
-	if (inl_mask & 2)
-		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
-	else
-		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
-
 	ctx->adata.key_inline = !!(inl_mask & 1);
 	ctx->cdata.key_inline = !!(inl_mask & 2);
 
@@ -247,16 +250,6 @@
 			      ARRAY_SIZE(data_len)) < 0)
 		return -EINVAL;
 
-	if (inl_mask & 1)
-		ctx->adata.key_virt = ctx->key;
-	else
-		ctx->adata.key_dma = ctx->key_dma;
-
-	if (inl_mask & 2)
-		ctx->cdata.key_virt = ctx->key + ctx->adata.keylen_pad;
-	else
-		ctx->cdata.key_dma = ctx->key_dma + ctx->adata.keylen_pad;
-
 	ctx->adata.key_inline = !!(inl_mask & 1);
 	ctx->cdata.key_inline = !!(inl_mask & 2);
 
@@ -329,7 +322,6 @@
 			    unsigned int keylen)
 {
 	struct crypto_authenc_keys keys;
-	u32 flags;
 	int err;
 
 	err = crypto_authenc_extractkeys(&keys, key, keylen);
@@ -340,14 +332,8 @@
 	if (keys.enckeylen != DES3_EDE_KEY_SIZE)
 		goto badkey;
 
-	flags = crypto_aead_get_flags(aead);
-	err = __des3_verify_key(&flags, keys.enckey);
-	if (unlikely(err)) {
-		crypto_aead_set_flags(aead, flags);
-		goto out;
-	}
-
-	err = aead_setkey(aead, key, keylen);
+	err = crypto_des3_ede_verify_key(crypto_aead_tfm(aead), keys.enckey) ?:
+	      aead_setkey(aead, key, keylen);
 
 out:
 	memzero_explicit(&keys, sizeof(keys));
@@ -719,6 +705,11 @@
 static int gcm_setauthsize(struct crypto_aead *authenc, unsigned int authsize)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	int err;
+
+	err = crypto_gcm_check_authsize(authsize);
+	if (err)
+		return err;
 
 	ctx->authsize = authsize;
 	gcm_set_sh_desc(authenc);
@@ -731,7 +722,13 @@
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *dev = ctx->dev;
+	int ret;
 
+	ret = aes_check_keylen(keylen);
+	if (ret) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 
@@ -805,6 +802,11 @@
 			       unsigned int authsize)
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
+	int err;
+
+	err = crypto_rfc4106_check_authsize(authsize);
+	if (err)
+		return err;
 
 	ctx->authsize = authsize;
 	rfc4106_set_sh_desc(authenc);
@@ -817,9 +819,13 @@
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *dev = ctx->dev;
+	int ret;
 
-	if (keylen < 4)
-		return -EINVAL;
+	ret = aes_check_keylen(keylen - 4);
+	if (ret) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -900,6 +906,9 @@
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(authenc);
 
+	if (authsize != 16)
+		return -EINVAL;
+
 	ctx->authsize = authsize;
 	rfc4543_set_sh_desc(authenc);
 
@@ -911,9 +920,13 @@
 {
 	struct caam_ctx *ctx = crypto_aead_ctx(aead);
 	struct device *dev = ctx->dev;
+	int ret;
 
-	if (keylen < 4)
-		return -EINVAL;
+	ret = aes_check_keylen(keylen - 4);
+	if (ret) {
+		crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
@@ -931,7 +944,7 @@
 }
 
 static int skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
-			   unsigned int keylen)
+			   unsigned int keylen, const u32 ctx1_iv_off)
 {
 	struct caam_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct caam_skcipher_alg *alg =
@@ -941,34 +954,11 @@
 	struct caam_flc *flc;
 	unsigned int ivsize = crypto_skcipher_ivsize(skcipher);
 	u32 *desc;
-	u32 ctx1_iv_off = 0;
-	const bool ctr_mode = ((ctx->cdata.algtype & OP_ALG_AAI_MASK) ==
-			       OP_ALG_AAI_CTR_MOD128) &&
-			       ((ctx->cdata.algtype & OP_ALG_ALGSEL_MASK) !=
-			       OP_ALG_ALGSEL_CHACHA20);
 	const bool is_rfc3686 = alg->caam.rfc3686;
 
 	print_hex_dump_debug("key in @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key, keylen, 1);
 
-	/*
-	 * AES-CTR needs to load IV in CONTEXT1 reg
-	 * at an offset of 128bits (16bytes)
-	 * CONTEXT1[255:128] = IV
-	 */
-	if (ctr_mode)
-		ctx1_iv_off = 16;
-
-	/*
-	 * RFC3686 specific:
-	 *	| CONTEXT1[255:128] = {NONCE, IV, COUNTER}
-	 *	| *key = {KEY, NONCE}
-	 */
-	if (is_rfc3686) {
-		ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
-		keylen -= CTR_RFC3686_NONCE_SIZE;
-	}
-
 	ctx->cdata.keylen = keylen;
 	ctx->cdata.key_virt = key;
 	ctx->cdata.key_inline = true;
@@ -996,11 +986,92 @@
 	return 0;
 }
 
-static int des3_skcipher_setkey(struct crypto_skcipher *skcipher,
-				const u8 *key, unsigned int keylen)
+static int aes_skcipher_setkey(struct crypto_skcipher *skcipher,
+			       const u8 *key, unsigned int keylen)
 {
-	return unlikely(des3_verify_key(skcipher, key)) ?:
-	       skcipher_setkey(skcipher, key, keylen);
+	int err;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_skcipher_set_flags(skcipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
+
+	return skcipher_setkey(skcipher, key, keylen, 0);
+}
+
+static int rfc3686_skcipher_setkey(struct crypto_skcipher *skcipher,
+				   const u8 *key, unsigned int keylen)
+{
+	u32 ctx1_iv_off;
+	int err;
+
+	/*
+	 * RFC3686 specific:
+	 *	| CONTEXT1[255:128] = {NONCE, IV, COUNTER}
+	 *	| *key = {KEY, NONCE}
+	 */
+	ctx1_iv_off = 16 + CTR_RFC3686_NONCE_SIZE;
+	keylen -= CTR_RFC3686_NONCE_SIZE;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_skcipher_set_flags(skcipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
+
+	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
+}
+
+static int ctr_skcipher_setkey(struct crypto_skcipher *skcipher,
+			       const u8 *key, unsigned int keylen)
+{
+	u32 ctx1_iv_off;
+	int err;
+
+	/*
+	 * AES-CTR needs to load IV in CONTEXT1 reg
+	 * at an offset of 128bits (16bytes)
+	 * CONTEXT1[255:128] = IV
+	 */
+	ctx1_iv_off = 16;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_skcipher_set_flags(skcipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
+
+	return skcipher_setkey(skcipher, key, keylen, ctx1_iv_off);
+}
+
+static int chacha20_skcipher_setkey(struct crypto_skcipher *skcipher,
+				    const u8 *key, unsigned int keylen)
+{
+	if (keylen != CHACHA_KEY_SIZE) {
+		crypto_skcipher_set_flags(skcipher,
+					  CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
+	return skcipher_setkey(skcipher, key, keylen, 0);
+}
+
+static int des_skcipher_setkey(struct crypto_skcipher *skcipher,
+			       const u8 *key, unsigned int keylen)
+{
+	return verify_skcipher_des_key(skcipher, key) ?:
+	       skcipher_setkey(skcipher, key, keylen, 0);
+}
+
+static int des3_skcipher_setkey(struct crypto_skcipher *skcipher,
+			        const u8 *key, unsigned int keylen)
+{
+	return verify_skcipher_des3_key(skcipher, key) ?:
+	       skcipher_setkey(skcipher, key, keylen, 0);
 }
 
 static int xts_skcipher_setkey(struct crypto_skcipher *skcipher, const u8 *key,
@@ -1227,10 +1298,8 @@
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
 
-	if (unlikely(status)) {
-		caam_qi2_strstatus(ctx->dev, status);
-		ecode = -EIO;
-	}
+	if (unlikely(status))
+		ecode = caam_qi2_strstatus(ctx->dev, status);
 
 	aead_unmap(ctx->dev, edesc, req);
 	qi_cache_free(edesc);
@@ -1250,17 +1319,8 @@
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
 
-	if (unlikely(status)) {
-		caam_qi2_strstatus(ctx->dev, status);
-		/*
-		 * verify hw auth check passed else return -EBADMSG
-		 */
-		if ((status & JRSTA_CCBERR_ERRID_MASK) ==
-		     JRSTA_CCBERR_ERRID_ICVCHK)
-			ecode = -EBADMSG;
-		else
-			ecode = -EIO;
-	}
+	if (unlikely(status))
+		ecode = caam_qi2_strstatus(ctx->dev, status);
 
 	aead_unmap(ctx->dev, edesc, req);
 	qi_cache_free(edesc);
@@ -1325,18 +1385,12 @@
 
 static int ipsec_gcm_encrypt(struct aead_request *req)
 {
-	if (req->assoclen < 8)
-		return -EINVAL;
-
-	return aead_encrypt(req);
+	return crypto_ipsec_check_assoclen(req->assoclen) ? : aead_encrypt(req);
 }
 
 static int ipsec_gcm_decrypt(struct aead_request *req)
 {
-	if (req->assoclen < 8)
-		return -EINVAL;
-
-	return aead_decrypt(req);
+	return crypto_ipsec_check_assoclen(req->assoclen) ? : aead_decrypt(req);
 }
 
 static void skcipher_encrypt_done(void *cbk_ctx, u32 status)
@@ -1352,10 +1406,8 @@
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
 
-	if (unlikely(status)) {
-		caam_qi2_strstatus(ctx->dev, status);
-		ecode = -EIO;
-	}
+	if (unlikely(status))
+		ecode = caam_qi2_strstatus(ctx->dev, status);
 
 	print_hex_dump_debug("dstiv  @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
@@ -1371,7 +1423,9 @@
 	 * ciphertext block (CBC mode) or last counter (CTR mode).
 	 * This is used e.g. by the CTS mode.
 	 */
-	memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes, ivsize);
+	if (!ecode)
+		memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes,
+		       ivsize);
 
 	qi_cache_free(edesc);
 	skcipher_request_complete(req, ecode);
@@ -1390,10 +1444,8 @@
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
 
-	if (unlikely(status)) {
-		caam_qi2_strstatus(ctx->dev, status);
-		ecode = -EIO;
-	}
+	if (unlikely(status))
+		ecode = caam_qi2_strstatus(ctx->dev, status);
 
 	print_hex_dump_debug("dstiv  @" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, req->iv,
@@ -1409,7 +1461,9 @@
 	 * ciphertext block (CBC mode) or last counter (CTR mode).
 	 * This is used e.g. by the CTS mode.
 	 */
-	memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes, ivsize);
+	if (!ecode)
+		memcpy(req->iv, (u8 *)&edesc->sgt[0] + edesc->qm_sg_bytes,
+		       ivsize);
 
 	qi_cache_free(edesc);
 	skcipher_request_complete(req, ecode);
@@ -1423,6 +1477,9 @@
 	struct caam_request *caam_req = skcipher_request_ctx(req);
 	int ret;
 
+	if (!req->cryptlen)
+		return 0;
+
 	/* allocate extended descriptor */
 	edesc = skcipher_edesc_alloc(req);
 	if (IS_ERR(edesc))
@@ -1451,6 +1508,8 @@
 	struct caam_request *caam_req = skcipher_request_ctx(req);
 	int ret;
 
+	if (!req->cryptlen)
+		return 0;
 	/* allocate extended descriptor */
 	edesc = skcipher_edesc_alloc(req);
 	if (IS_ERR(edesc))
@@ -1545,7 +1604,7 @@
 				.cra_driver_name = "cbc-aes-caam-qi2",
 				.cra_blocksize = AES_BLOCK_SIZE,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = aes_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
@@ -1577,7 +1636,7 @@
 				.cra_driver_name = "cbc-des-caam-qi2",
 				.cra_blocksize = DES_BLOCK_SIZE,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = des_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = DES_KEY_SIZE,
@@ -1593,7 +1652,7 @@
 				.cra_driver_name = "ctr-aes-caam-qi2",
 				.cra_blocksize = 1,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = ctr_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE,
@@ -1611,7 +1670,7 @@
 				.cra_driver_name = "rfc3686-ctr-aes-caam-qi2",
 				.cra_blocksize = 1,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = rfc3686_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = AES_MIN_KEY_SIZE +
@@ -1650,7 +1709,7 @@
 				.cra_driver_name = "chacha20-caam-qi2",
 				.cra_blocksize = 1,
 			},
-			.setkey = skcipher_setkey,
+			.setkey = chacha20_skcipher_setkey,
 			.encrypt = skcipher_encrypt,
 			.decrypt = skcipher_decrypt,
 			.min_keysize = CHACHA_KEY_SIZE,
@@ -2918,6 +2977,7 @@
 /**
  * caam_hash_ctx - ahash per-session context
  * @flc: Flow Contexts array
+ * @key: authentication key
  * @flc_dma: I/O virtual addresses of the Flow Contexts
  * @dev: dpseci device
  * @ctx_len: size of Context Register
@@ -2925,6 +2985,7 @@
  */
 struct caam_hash_ctx {
 	struct caam_flc flc[HASH_NUM_OP];
+	u8 key[CAAM_MAX_HASH_BLOCK_SIZE] ____cacheline_aligned;
 	dma_addr_t flc_dma[HASH_NUM_OP];
 	struct device *dev;
 	int ctx_len;
@@ -3094,10 +3155,7 @@
 
 	dev_dbg(res->dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
-	if (err)
-		caam_qi2_strstatus(res->dev, err);
-
-	res->err = err;
+	res->err = err ? caam_qi2_strstatus(res->dev, err) : 0;
 	complete(&res->completion);
 }
 
@@ -3228,6 +3286,19 @@
 	ctx->adata.key_virt = key;
 	ctx->adata.key_inline = true;
 
+	/*
+	 * In case |user key| > |derived key|, using DKP<imm,imm> would result
+	 * in invalid opcodes (last bytes of user key) in the resulting
+	 * descriptor. Use DKP<ptr,imm> instead => both virtual and dma key
+	 * addresses are needed.
+	 */
+	if (keylen > ctx->adata.keylen_pad) {
+		memcpy(ctx->key, key, keylen);
+		dma_sync_single_for_device(ctx->dev, ctx->adata.key_dma,
+					   ctx->adata.keylen_pad,
+					   DMA_TO_DEVICE);
+	}
+
 	ret = ahash_set_sh_desc(ahash);
 	kfree(hashed_key);
 	return ret;
@@ -3282,10 +3353,8 @@
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
 
-	if (unlikely(status)) {
-		caam_qi2_strstatus(ctx->dev, status);
-		ecode = -EIO;
-	}
+	if (unlikely(status))
+		ecode = caam_qi2_strstatus(ctx->dev, status);
 
 	ahash_unmap_ctx(ctx->dev, edesc, req, DMA_FROM_DEVICE);
 	memcpy(req->result, state->caam_ctx, digestsize);
@@ -3310,10 +3379,8 @@
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
 
-	if (unlikely(status)) {
-		caam_qi2_strstatus(ctx->dev, status);
-		ecode = -EIO;
-	}
+	if (unlikely(status))
+		ecode = caam_qi2_strstatus(ctx->dev, status);
 
 	ahash_unmap_ctx(ctx->dev, edesc, req, DMA_BIDIRECTIONAL);
 	switch_buf(state);
@@ -3343,10 +3410,8 @@
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
 
-	if (unlikely(status)) {
-		caam_qi2_strstatus(ctx->dev, status);
-		ecode = -EIO;
-	}
+	if (unlikely(status))
+		ecode = caam_qi2_strstatus(ctx->dev, status);
 
 	ahash_unmap_ctx(ctx->dev, edesc, req, DMA_BIDIRECTIONAL);
 	memcpy(req->result, state->caam_ctx, digestsize);
@@ -3371,10 +3436,8 @@
 
 	dev_dbg(ctx->dev, "%s %d: err 0x%x\n", __func__, __LINE__, status);
 
-	if (unlikely(status)) {
-		caam_qi2_strstatus(ctx->dev, status);
-		ecode = -EIO;
-	}
+	if (unlikely(status))
+		ecode = caam_qi2_strstatus(ctx->dev, status);
 
 	ahash_unmap_ctx(ctx->dev, edesc, req, DMA_FROM_DEVICE);
 	switch_buf(state);
@@ -4466,11 +4529,27 @@
 
 	ctx->dev = caam_hash->dev;
 
+	if (alg->setkey) {
+		ctx->adata.key_dma = dma_map_single_attrs(ctx->dev, ctx->key,
+							  ARRAY_SIZE(ctx->key),
+							  DMA_TO_DEVICE,
+							  DMA_ATTR_SKIP_CPU_SYNC);
+		if (dma_mapping_error(ctx->dev, ctx->adata.key_dma)) {
+			dev_err(ctx->dev, "unable to map key\n");
+			return -ENOMEM;
+		}
+	}
+
 	dma_addr = dma_map_single_attrs(ctx->dev, ctx->flc, sizeof(ctx->flc),
 					DMA_BIDIRECTIONAL,
 					DMA_ATTR_SKIP_CPU_SYNC);
 	if (dma_mapping_error(ctx->dev, dma_addr)) {
 		dev_err(ctx->dev, "unable to map shared descriptors\n");
+		if (ctx->adata.key_dma)
+			dma_unmap_single_attrs(ctx->dev, ctx->adata.key_dma,
+					       ARRAY_SIZE(ctx->key),
+					       DMA_TO_DEVICE,
+					       DMA_ATTR_SKIP_CPU_SYNC);
 		return -ENOMEM;
 	}
 
@@ -4496,6 +4575,10 @@
 
 	dma_unmap_single_attrs(ctx->dev, ctx->flc_dma[0], sizeof(ctx->flc),
 			       DMA_BIDIRECTIONAL, DMA_ATTR_SKIP_CPU_SYNC);
+	if (ctx->adata.key_dma)
+		dma_unmap_single_attrs(ctx->dev, ctx->adata.key_dma,
+				       ARRAY_SIZE(ctx->key), DMA_TO_DEVICE,
+				       DMA_ATTR_SKIP_CPU_SYNC);
 }
 
 static struct caam_hash_alg *caam_hash_alloc(struct device *dev,
@@ -4700,7 +4783,7 @@
 
 	fd_err = dpaa2_fd_get_ctrl(fd) & FD_CTRL_ERR_MASK;
 	if (unlikely(fd_err))
-		dev_err(priv->dev, "FD error: %08x\n", fd_err);
+		dev_err_ratelimited(priv->dev, "FD error: %08x\n", fd_err);
 
 	/*
 	 * FD[ADDR] is guaranteed to be valid, irrespective of errors reported
@@ -5098,6 +5181,8 @@
 		goto err_bind;
 	}
 
+	dpaa2_dpseci_debugfs_init(priv);
+
 	/* register crypto algorithms the device supports */
 	for (i = 0; i < ARRAY_SIZE(driver_algs); i++) {
 		struct caam_skcipher_alg *t_alg = driver_algs + i;
@@ -5265,6 +5350,8 @@
 	dev = &ls_dev->dev;
 	priv = dev_get_drvdata(dev);
 
+	dpaa2_dpseci_debugfs_exit(priv);
+
 	for (i = 0; i < ARRAY_SIZE(driver_aeads); i++) {
 		struct caam_aead_alg *t_alg = driver_aeads + i;
 
diff --git a/drivers/crypto/caam/caamalg_qi2.h b/drivers/crypto/caam/caamalg_qi2.h
index be50854..7067367 100644
--- a/drivers/crypto/caam/caamalg_qi2.h
+++ b/drivers/crypto/caam/caamalg_qi2.h
@@ -10,12 +10,13 @@
 #include <soc/fsl/dpaa2-io.h>
 #include <soc/fsl/dpaa2-fd.h>
 #include <linux/threads.h>
+#include <linux/netdevice.h>
 #include "dpseci.h"
 #include "desc_constr.h"
 
 #define DPAA2_CAAM_STORE_SIZE	16
 /* NAPI weight *must* be a multiple of the store size. */
-#define DPAA2_CAAM_NAPI_WEIGHT	64
+#define DPAA2_CAAM_NAPI_WEIGHT	512
 
 /* The congestion entrance threshold was chosen so that on LS2088
  * we support the maximum throughput for the available memory
@@ -64,6 +65,7 @@
 	struct iommu_domain *domain;
 
 	struct dpaa2_caam_priv_per_cpu __percpu *ppriv;
+	struct dentry *dfs_root;
 };
 
 /**
@@ -90,33 +92,6 @@
 	struct dpaa2_io *dpio;
 };
 
-/*
- * The CAAM QI hardware constructs a job descriptor which points
- * to shared descriptor (as pointed by context_a of FQ to CAAM).
- * When the job descriptor is executed by deco, the whole job
- * descriptor together with shared descriptor gets loaded in
- * deco buffer which is 64 words long (each 32-bit).
- *
- * The job descriptor constructed by QI hardware has layout:
- *
- *	HEADER		(1 word)
- *	Shdesc ptr	(1 or 2 words)
- *	SEQ_OUT_PTR	(1 word)
- *	Out ptr		(1 or 2 words)
- *	Out length	(1 word)
- *	SEQ_IN_PTR	(1 word)
- *	In ptr		(1 or 2 words)
- *	In length	(1 word)
- *
- * The shdesc ptr is used to fetch shared descriptor contents
- * into deco buffer.
- *
- * Apart from shdesc contents, the total number of words that
- * get loaded in deco buffer are '8' or '11'. The remaining words
- * in deco buffer can be used for storing shared descriptor.
- */
-#define MAX_SDLEN	((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / CAAM_CMD_SZ)
-
 /* Length of a single buffer in the QI driver memory cache */
 #define CAAM_QI_MEMCACHE_SIZE	512
 
diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c
index e4ac5d5..65399cb 100644
--- a/drivers/crypto/caam/caamhash.c
+++ b/drivers/crypto/caam/caamhash.c
@@ -95,8 +95,8 @@
 	dma_addr_t sh_desc_update_first_dma;
 	dma_addr_t sh_desc_fin_dma;
 	dma_addr_t sh_desc_digest_dma;
-	dma_addr_t key_dma;
 	enum dma_data_direction dir;
+	enum dma_data_direction key_dir;
 	struct device *jrdev;
 	int ctx_len;
 	struct alginfo adata;
@@ -282,13 +282,10 @@
 	struct device *jrdev = ctx->jrdev;
 	u32 *desc;
 
-	/* key is loaded from memory for UPDATE and FINALIZE states */
-	ctx->adata.key_dma = ctx->key_dma;
-
 	/* shared descriptor for ahash_update */
 	desc = ctx->sh_desc_update;
 	cnstr_shdsc_sk_hash(desc, &ctx->adata, OP_ALG_AS_UPDATE,
-			    ctx->ctx_len, ctx->ctx_len, 0);
+			    ctx->ctx_len, ctx->ctx_len);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_dma,
 				   desc_bytes(desc), ctx->dir);
 	print_hex_dump_debug("axcbc update shdesc@" __stringify(__LINE__)" : ",
@@ -298,7 +295,7 @@
 	/* shared descriptor for ahash_{final,finup} */
 	desc = ctx->sh_desc_fin;
 	cnstr_shdsc_sk_hash(desc, &ctx->adata, OP_ALG_AS_FINALIZE,
-			    digestsize, ctx->ctx_len, 0);
+			    digestsize, ctx->ctx_len);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_fin_dma,
 				   desc_bytes(desc), ctx->dir);
 	print_hex_dump_debug("axcbc finup shdesc@" __stringify(__LINE__)" : ",
@@ -311,7 +308,7 @@
 	/* shared descriptor for first invocation of ahash_update */
 	desc = ctx->sh_desc_update_first;
 	cnstr_shdsc_sk_hash(desc, &ctx->adata, OP_ALG_AS_INIT, ctx->ctx_len,
-			    ctx->ctx_len, ctx->key_dma);
+			    ctx->ctx_len);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma,
 				   desc_bytes(desc), ctx->dir);
 	print_hex_dump_debug("axcbc update first shdesc@" __stringify(__LINE__)
@@ -321,7 +318,7 @@
 	/* shared descriptor for ahash_digest */
 	desc = ctx->sh_desc_digest;
 	cnstr_shdsc_sk_hash(desc, &ctx->adata, OP_ALG_AS_INITFINAL,
-			    digestsize, ctx->ctx_len, 0);
+			    digestsize, ctx->ctx_len);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_digest_dma,
 				   desc_bytes(desc), ctx->dir);
 	print_hex_dump_debug("axcbc digest shdesc@" __stringify(__LINE__)" : ",
@@ -340,7 +337,7 @@
 	/* shared descriptor for ahash_update */
 	desc = ctx->sh_desc_update;
 	cnstr_shdsc_sk_hash(desc, &ctx->adata, OP_ALG_AS_UPDATE,
-			    ctx->ctx_len, ctx->ctx_len, 0);
+			    ctx->ctx_len, ctx->ctx_len);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_dma,
 				   desc_bytes(desc), ctx->dir);
 	print_hex_dump_debug("acmac update shdesc@" __stringify(__LINE__)" : ",
@@ -350,7 +347,7 @@
 	/* shared descriptor for ahash_{final,finup} */
 	desc = ctx->sh_desc_fin;
 	cnstr_shdsc_sk_hash(desc, &ctx->adata, OP_ALG_AS_FINALIZE,
-			    digestsize, ctx->ctx_len, 0);
+			    digestsize, ctx->ctx_len);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_fin_dma,
 				   desc_bytes(desc), ctx->dir);
 	print_hex_dump_debug("acmac finup shdesc@" __stringify(__LINE__)" : ",
@@ -360,7 +357,7 @@
 	/* shared descriptor for first invocation of ahash_update */
 	desc = ctx->sh_desc_update_first;
 	cnstr_shdsc_sk_hash(desc, &ctx->adata, OP_ALG_AS_INIT, ctx->ctx_len,
-			    ctx->ctx_len, 0);
+			    ctx->ctx_len);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_update_first_dma,
 				   desc_bytes(desc), ctx->dir);
 	print_hex_dump_debug("acmac update first shdesc@" __stringify(__LINE__)
@@ -370,7 +367,7 @@
 	/* shared descriptor for ahash_digest */
 	desc = ctx->sh_desc_digest;
 	cnstr_shdsc_sk_hash(desc, &ctx->adata, OP_ALG_AS_INITFINAL,
-			    digestsize, ctx->ctx_len, 0);
+			    digestsize, ctx->ctx_len);
 	dma_sync_single_for_device(jrdev, ctx->sh_desc_digest_dma,
 				   desc_bytes(desc), ctx->dir);
 	print_hex_dump_debug("acmac digest shdesc@" __stringify(__LINE__)" : ",
@@ -480,6 +477,18 @@
 			goto bad_free_key;
 
 		memcpy(ctx->key, key, keylen);
+
+		/*
+		 * In case |user key| > |derived key|, using DKP<imm,imm>
+		 * would result in invalid opcodes (last bytes of user key) in
+		 * the resulting descriptor. Use DKP<ptr,imm> instead => both
+		 * virtual and dma key addresses are needed.
+		 */
+		if (keylen > ctx->adata.keylen_pad)
+			dma_sync_single_for_device(ctx->jrdev,
+						   ctx->adata.key_dma,
+						   ctx->adata.keylen_pad,
+						   DMA_TO_DEVICE);
 	} else {
 		ret = gen_split_key(ctx->jrdev, ctx->key, &ctx->adata, key,
 				    keylen, CAAM_MAX_HASH_KEY_SIZE);
@@ -501,8 +510,14 @@
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct device *jrdev = ctx->jrdev;
 
+	if (keylen != AES_KEYSIZE_128) {
+		crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+
 	memcpy(ctx->key, key, keylen);
-	dma_sync_single_for_device(jrdev, ctx->key_dma, keylen, DMA_TO_DEVICE);
+	dma_sync_single_for_device(jrdev, ctx->adata.key_dma, keylen,
+				   DMA_TO_DEVICE);
 	ctx->adata.keylen = keylen;
 
 	print_hex_dump_debug("axcbc ctx.key@" __stringify(__LINE__)" : ",
@@ -515,6 +530,13 @@
 			unsigned int keylen)
 {
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int err;
+
+	err = aes_check_keylen(keylen);
+	if (err) {
+		crypto_ahash_set_flags(ahash, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return err;
+	}
 
 	/* key is immediate data for all cmac shared descriptors */
 	ctx->adata.key_virt = key;
@@ -538,7 +560,7 @@
 	dma_addr_t sec4_sg_dma;
 	int src_nents;
 	int sec4_sg_bytes;
-	u32 hw_desc[DESC_JOB_IO_LEN / sizeof(u32)] ____cacheline_aligned;
+	u32 hw_desc[DESC_JOB_IO_LEN_MAX / sizeof(u32)] ____cacheline_aligned;
 	struct sec4_sg_entry sec4_sg[0];
 };
 
@@ -584,12 +606,13 @@
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int ecode = 0;
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
 	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
 	if (err)
-		caam_jr_strstatus(jrdev, err);
+		ecode = caam_jr_strstatus(jrdev, err);
 
 	ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_FROM_DEVICE);
 	memcpy(req->result, state->caam_ctx, digestsize);
@@ -599,7 +622,7 @@
 			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
 			     ctx->ctx_len, 1);
 
-	req->base.complete(&req->base, err);
+	req->base.complete(&req->base, ecode);
 }
 
 static void ahash_done_bi(struct device *jrdev, u32 *desc, u32 err,
@@ -611,12 +634,13 @@
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	int digestsize = crypto_ahash_digestsize(ahash);
+	int ecode = 0;
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
 	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
 	if (err)
-		caam_jr_strstatus(jrdev, err);
+		ecode = caam_jr_strstatus(jrdev, err);
 
 	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_BIDIRECTIONAL);
 	switch_buf(state);
@@ -630,7 +654,7 @@
 				     DUMP_PREFIX_ADDRESS, 16, 4, req->result,
 				     digestsize, 1);
 
-	req->base.complete(&req->base, err);
+	req->base.complete(&req->base, ecode);
 }
 
 static void ahash_done_ctx_src(struct device *jrdev, u32 *desc, u32 err,
@@ -642,12 +666,13 @@
 	int digestsize = crypto_ahash_digestsize(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
+	int ecode = 0;
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
 	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
 	if (err)
-		caam_jr_strstatus(jrdev, err);
+		ecode = caam_jr_strstatus(jrdev, err);
 
 	ahash_unmap_ctx(jrdev, edesc, req, digestsize, DMA_BIDIRECTIONAL);
 	memcpy(req->result, state->caam_ctx, digestsize);
@@ -657,7 +682,7 @@
 			     DUMP_PREFIX_ADDRESS, 16, 4, state->caam_ctx,
 			     ctx->ctx_len, 1);
 
-	req->base.complete(&req->base, err);
+	req->base.complete(&req->base, ecode);
 }
 
 static void ahash_done_ctx_dst(struct device *jrdev, u32 *desc, u32 err,
@@ -669,12 +694,13 @@
 	struct caam_hash_ctx *ctx = crypto_ahash_ctx(ahash);
 	struct caam_hash_state *state = ahash_request_ctx(req);
 	int digestsize = crypto_ahash_digestsize(ahash);
+	int ecode = 0;
 
 	dev_dbg(jrdev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
 	edesc = container_of(desc, struct ahash_edesc, hw_desc[0]);
 	if (err)
-		caam_jr_strstatus(jrdev, err);
+		ecode = caam_jr_strstatus(jrdev, err);
 
 	ahash_unmap_ctx(jrdev, edesc, req, ctx->ctx_len, DMA_FROM_DEVICE);
 	switch_buf(state);
@@ -688,7 +714,7 @@
 				     DUMP_PREFIX_ADDRESS, 16, 4, req->result,
 				     digestsize, 1);
 
-	req->base.complete(&req->base, err);
+	req->base.complete(&req->base, ecode);
 }
 
 /*
@@ -1812,40 +1838,50 @@
 
 	if (is_xcbc_aes(caam_hash->alg_type)) {
 		ctx->dir = DMA_TO_DEVICE;
+		ctx->key_dir = DMA_BIDIRECTIONAL;
 		ctx->adata.algtype = OP_TYPE_CLASS1_ALG | caam_hash->alg_type;
 		ctx->ctx_len = 48;
-
-		ctx->key_dma = dma_map_single_attrs(ctx->jrdev, ctx->key,
-						    ARRAY_SIZE(ctx->key),
-						    DMA_BIDIRECTIONAL,
-						    DMA_ATTR_SKIP_CPU_SYNC);
-		if (dma_mapping_error(ctx->jrdev, ctx->key_dma)) {
-			dev_err(ctx->jrdev, "unable to map key\n");
-			caam_jr_free(ctx->jrdev);
-			return -ENOMEM;
-		}
 	} else if (is_cmac_aes(caam_hash->alg_type)) {
 		ctx->dir = DMA_TO_DEVICE;
+		ctx->key_dir = DMA_NONE;
 		ctx->adata.algtype = OP_TYPE_CLASS1_ALG | caam_hash->alg_type;
 		ctx->ctx_len = 32;
 	} else {
-		ctx->dir = priv->era >= 6 ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+		if (priv->era >= 6) {
+			ctx->dir = DMA_BIDIRECTIONAL;
+			ctx->key_dir = alg->setkey ? DMA_TO_DEVICE : DMA_NONE;
+		} else {
+			ctx->dir = DMA_TO_DEVICE;
+			ctx->key_dir = DMA_NONE;
+		}
 		ctx->adata.algtype = OP_TYPE_CLASS2_ALG | caam_hash->alg_type;
 		ctx->ctx_len = runninglen[(ctx->adata.algtype &
 					   OP_ALG_ALGSEL_SUBMASK) >>
 					  OP_ALG_ALGSEL_SHIFT];
 	}
 
+	if (ctx->key_dir != DMA_NONE) {
+		ctx->adata.key_dma = dma_map_single_attrs(ctx->jrdev, ctx->key,
+							  ARRAY_SIZE(ctx->key),
+							  ctx->key_dir,
+							  DMA_ATTR_SKIP_CPU_SYNC);
+		if (dma_mapping_error(ctx->jrdev, ctx->adata.key_dma)) {
+			dev_err(ctx->jrdev, "unable to map key\n");
+			caam_jr_free(ctx->jrdev);
+			return -ENOMEM;
+		}
+	}
+
 	dma_addr = dma_map_single_attrs(ctx->jrdev, ctx->sh_desc_update,
 					offsetof(struct caam_hash_ctx, key),
 					ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
 	if (dma_mapping_error(ctx->jrdev, dma_addr)) {
 		dev_err(ctx->jrdev, "unable to map shared descriptors\n");
 
-		if (is_xcbc_aes(caam_hash->alg_type))
-			dma_unmap_single_attrs(ctx->jrdev, ctx->key_dma,
+		if (ctx->key_dir != DMA_NONE)
+			dma_unmap_single_attrs(ctx->jrdev, ctx->adata.key_dma,
 					       ARRAY_SIZE(ctx->key),
-					       DMA_BIDIRECTIONAL,
+					       ctx->key_dir,
 					       DMA_ATTR_SKIP_CPU_SYNC);
 
 		caam_jr_free(ctx->jrdev);
@@ -1878,9 +1914,9 @@
 	dma_unmap_single_attrs(ctx->jrdev, ctx->sh_desc_update_dma,
 			       offsetof(struct caam_hash_ctx, key),
 			       ctx->dir, DMA_ATTR_SKIP_CPU_SYNC);
-	if (is_xcbc_aes(ctx->adata.algtype))
-		dma_unmap_single_attrs(ctx->jrdev, ctx->key_dma,
-				       ARRAY_SIZE(ctx->key), DMA_BIDIRECTIONAL,
+	if (ctx->key_dir != DMA_NONE)
+		dma_unmap_single_attrs(ctx->jrdev, ctx->adata.key_dma,
+				       ARRAY_SIZE(ctx->key), ctx->key_dir,
 				       DMA_ATTR_SKIP_CPU_SYNC);
 	caam_jr_free(ctx->jrdev);
 }
@@ -1971,7 +2007,7 @@
 	 * is not present.
 	 */
 	if (!md_inst)
-		return -ENODEV;
+		return 0;
 
 	/* Limit digest size based on LP256 */
 	if (md_vid == CHA_VER_VID_MD_LP256)
diff --git a/drivers/crypto/caam/caamhash_desc.c b/drivers/crypto/caam/caamhash_desc.c
index 71d0183..78383d7 100644
--- a/drivers/crypto/caam/caamhash_desc.c
+++ b/drivers/crypto/caam/caamhash_desc.c
@@ -83,10 +83,9 @@
  * @state: algorithm state OP_ALG_AS_{INIT, FINALIZE, INITFINALIZE, UPDATE}
  * @digestsize: algorithm's digest size
  * @ctx_len: size of Context Register
- * @key_dma: I/O Virtual Address of the key
  */
 void cnstr_shdsc_sk_hash(u32 * const desc, struct alginfo *adata, u32 state,
-			 int digestsize, int ctx_len, dma_addr_t key_dma)
+			 int digestsize, int ctx_len)
 {
 	u32 *skip_key_load;
 
@@ -136,7 +135,7 @@
 			 LDST_SRCDST_BYTE_CONTEXT);
 	if (is_xcbc_aes(adata->algtype) && state == OP_ALG_AS_INIT)
 		/* Save K1 */
-		append_fifo_store(desc, key_dma, adata->keylen,
+		append_fifo_store(desc, adata->key_dma, adata->keylen,
 				  LDST_CLASS_1_CCB | FIFOST_TYPE_KEY_KEK);
 }
 EXPORT_SYMBOL(cnstr_shdsc_sk_hash);
diff --git a/drivers/crypto/caam/caamhash_desc.h b/drivers/crypto/caam/caamhash_desc.h
index 6947ee1..4f369b8 100644
--- a/drivers/crypto/caam/caamhash_desc.h
+++ b/drivers/crypto/caam/caamhash_desc.h
@@ -25,5 +25,5 @@
 		       int digestsize, int ctx_len, bool import_ctx, int era);
 
 void cnstr_shdsc_sk_hash(u32 * const desc, struct alginfo *adata, u32 state,
-			 int digestsize, int ctx_len, dma_addr_t key_dma);
+			 int digestsize, int ctx_len);
 #endif /* _CAAMHASH_DESC_H_ */
diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c
index 8057410..83f96d4 100644
--- a/drivers/crypto/caam/caampkc.c
+++ b/drivers/crypto/caam/caampkc.c
@@ -17,18 +17,29 @@
 #include "sg_sw_sec4.h"
 #include "caampkc.h"
 
-#define DESC_RSA_PUB_LEN	(2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb))
+#define DESC_RSA_PUB_LEN	(2 * CAAM_CMD_SZ + SIZEOF_RSA_PUB_PDB)
 #define DESC_RSA_PRIV_F1_LEN	(2 * CAAM_CMD_SZ + \
-				 sizeof(struct rsa_priv_f1_pdb))
+				 SIZEOF_RSA_PRIV_F1_PDB)
 #define DESC_RSA_PRIV_F2_LEN	(2 * CAAM_CMD_SZ + \
-				 sizeof(struct rsa_priv_f2_pdb))
+				 SIZEOF_RSA_PRIV_F2_PDB)
 #define DESC_RSA_PRIV_F3_LEN	(2 * CAAM_CMD_SZ + \
-				 sizeof(struct rsa_priv_f3_pdb))
+				 SIZEOF_RSA_PRIV_F3_PDB)
 #define CAAM_RSA_MAX_INPUT_SIZE	512 /* for a 4096-bit modulus */
 
 /* buffer filled with zeros, used for padding */
 static u8 *zero_buffer;
 
+/*
+ * variable used to avoid double free of resources in case
+ * algorithm registration was unsuccessful
+ */
+static bool init_done;
+
+struct caam_akcipher_alg {
+	struct akcipher_alg akcipher;
+	bool registered;
+};
+
 static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc,
 			 struct akcipher_request *req)
 {
@@ -107,9 +118,10 @@
 {
 	struct akcipher_request *req = context;
 	struct rsa_edesc *edesc;
+	int ecode = 0;
 
 	if (err)
-		caam_jr_strstatus(dev, err);
+		ecode = caam_jr_strstatus(dev, err);
 
 	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
 
@@ -117,7 +129,7 @@
 	rsa_io_unmap(dev, edesc, req);
 	kfree(edesc);
 
-	akcipher_request_complete(req, err);
+	akcipher_request_complete(req, ecode);
 }
 
 static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err,
@@ -125,9 +137,10 @@
 {
 	struct akcipher_request *req = context;
 	struct rsa_edesc *edesc;
+	int ecode = 0;
 
 	if (err)
-		caam_jr_strstatus(dev, err);
+		ecode = caam_jr_strstatus(dev, err);
 
 	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
 
@@ -135,7 +148,7 @@
 	rsa_io_unmap(dev, edesc, req);
 	kfree(edesc);
 
-	akcipher_request_complete(req, err);
+	akcipher_request_complete(req, ecode);
 }
 
 static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err,
@@ -143,9 +156,10 @@
 {
 	struct akcipher_request *req = context;
 	struct rsa_edesc *edesc;
+	int ecode = 0;
 
 	if (err)
-		caam_jr_strstatus(dev, err);
+		ecode = caam_jr_strstatus(dev, err);
 
 	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
 
@@ -153,7 +167,7 @@
 	rsa_io_unmap(dev, edesc, req);
 	kfree(edesc);
 
-	akcipher_request_complete(req, err);
+	akcipher_request_complete(req, ecode);
 }
 
 static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err,
@@ -161,9 +175,10 @@
 {
 	struct akcipher_request *req = context;
 	struct rsa_edesc *edesc;
+	int ecode = 0;
 
 	if (err)
-		caam_jr_strstatus(dev, err);
+		ecode = caam_jr_strstatus(dev, err);
 
 	edesc = container_of(desc, struct rsa_edesc, hw_desc[0]);
 
@@ -171,7 +186,7 @@
 	rsa_io_unmap(dev, edesc, req);
 	kfree(edesc);
 
-	akcipher_request_complete(req, err);
+	akcipher_request_complete(req, ecode);
 }
 
 /**
@@ -867,7 +882,7 @@
 		return ret;
 
 	/* Copy key in DMA zone */
-	rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL);
+	rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_DMA | GFP_KERNEL);
 	if (!rsa_key->e)
 		goto err;
 
@@ -889,8 +904,6 @@
 	rsa_key->e_sz = raw_key.e_sz;
 	rsa_key->n_sz = raw_key.n_sz;
 
-	memcpy(rsa_key->e, raw_key.e, raw_key.e_sz);
-
 	return 0;
 err:
 	caam_rsa_free_key(rsa_key);
@@ -971,11 +984,11 @@
 		return ret;
 
 	/* Copy key in DMA zone */
-	rsa_key->d = kzalloc(raw_key.d_sz, GFP_DMA | GFP_KERNEL);
+	rsa_key->d = kmemdup(raw_key.d, raw_key.d_sz, GFP_DMA | GFP_KERNEL);
 	if (!rsa_key->d)
 		goto err;
 
-	rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL);
+	rsa_key->e = kmemdup(raw_key.e, raw_key.e_sz, GFP_DMA | GFP_KERNEL);
 	if (!rsa_key->e)
 		goto err;
 
@@ -998,9 +1011,6 @@
 	rsa_key->e_sz = raw_key.e_sz;
 	rsa_key->n_sz = raw_key.n_sz;
 
-	memcpy(rsa_key->d, raw_key.d, raw_key.d_sz);
-	memcpy(rsa_key->e, raw_key.e, raw_key.e_sz);
-
 	caam_rsa_set_priv_key_form(ctx, &raw_key);
 
 	return 0;
@@ -1053,22 +1063,24 @@
 	caam_jr_free(ctx->dev);
 }
 
-static struct akcipher_alg caam_rsa = {
-	.encrypt = caam_rsa_enc,
-	.decrypt = caam_rsa_dec,
-	.set_pub_key = caam_rsa_set_pub_key,
-	.set_priv_key = caam_rsa_set_priv_key,
-	.max_size = caam_rsa_max_size,
-	.init = caam_rsa_init_tfm,
-	.exit = caam_rsa_exit_tfm,
-	.reqsize = sizeof(struct caam_rsa_req_ctx),
-	.base = {
-		.cra_name = "rsa",
-		.cra_driver_name = "rsa-caam",
-		.cra_priority = 3000,
-		.cra_module = THIS_MODULE,
-		.cra_ctxsize = sizeof(struct caam_rsa_ctx),
-	},
+static struct caam_akcipher_alg caam_rsa = {
+	.akcipher = {
+		.encrypt = caam_rsa_enc,
+		.decrypt = caam_rsa_dec,
+		.set_pub_key = caam_rsa_set_pub_key,
+		.set_priv_key = caam_rsa_set_priv_key,
+		.max_size = caam_rsa_max_size,
+		.init = caam_rsa_init_tfm,
+		.exit = caam_rsa_exit_tfm,
+		.reqsize = sizeof(struct caam_rsa_req_ctx),
+		.base = {
+			.cra_name = "rsa",
+			.cra_driver_name = "rsa-caam",
+			.cra_priority = 3000,
+			.cra_module = THIS_MODULE,
+			.cra_ctxsize = sizeof(struct caam_rsa_ctx),
+		},
+	}
 };
 
 /* Public Key Cryptography module initialization handler */
@@ -1077,6 +1089,7 @@
 	struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
 	u32 pk_inst;
 	int err;
+	init_done = false;
 
 	/* Determine public key hardware accelerator presence. */
 	if (priv->era < 10)
@@ -1095,12 +1108,15 @@
 	if (!zero_buffer)
 		return -ENOMEM;
 
-	err = crypto_register_akcipher(&caam_rsa);
+	err = crypto_register_akcipher(&caam_rsa.akcipher);
+
 	if (err) {
 		kfree(zero_buffer);
 		dev_warn(ctrldev, "%s alg registration failed\n",
-			 caam_rsa.base.cra_driver_name);
+			 caam_rsa.akcipher.base.cra_driver_name);
 	} else {
+		init_done = true;
+		caam_rsa.registered = true;
 		dev_info(ctrldev, "caam pkc algorithms registered in /proc/crypto\n");
 	}
 
@@ -1109,6 +1125,11 @@
 
 void caam_pkc_exit(void)
 {
+	if (!init_done)
+		return;
+
+	if (caam_rsa.registered)
+		crypto_unregister_akcipher(&caam_rsa.akcipher);
+
 	kfree(zero_buffer);
-	crypto_unregister_akcipher(&caam_rsa);
 }
diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c
index 561bcb5..e8baaca 100644
--- a/drivers/crypto/caam/caamrng.c
+++ b/drivers/crypto/caam/caamrng.c
@@ -53,7 +53,7 @@
 					 L1_CACHE_BYTES)
 
 /* length of descriptors */
-#define DESC_JOB_O_LEN			(CAAM_CMD_SZ * 2 + CAAM_PTR_SZ * 2)
+#define DESC_JOB_O_LEN			(CAAM_CMD_SZ * 2 + CAAM_PTR_SZ_MAX * 2)
 #define DESC_RNG_LEN			(3 * CAAM_CMD_SZ)
 
 /* Buffer, its dma address and lock */
@@ -80,6 +80,12 @@
 
 static struct caam_rng_ctx *rng_ctx;
 
+/*
+ * Variable used to avoid double free of resources in case
+ * algorithm registration was unsuccessful
+ */
+static bool init_done;
+
 static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd)
 {
 	if (bd->addr)
@@ -296,6 +302,9 @@
 
 void caam_rng_exit(void)
 {
+	if (!init_done)
+		return;
+
 	caam_jr_free(rng_ctx->jrdev);
 	hwrng_unregister(&caam_rng);
 	kfree(rng_ctx);
@@ -307,6 +316,7 @@
 	u32 rng_inst;
 	struct caam_drv_private *priv = dev_get_drvdata(ctrldev);
 	int err;
+	init_done = false;
 
 	/* Check for an instantiated RNG before registration */
 	if (priv->era < 10)
@@ -333,7 +343,12 @@
 		goto free_rng_ctx;
 
 	dev_info(dev, "registering rng-caam\n");
-	return hwrng_register(&caam_rng);
+
+	err = hwrng_register(&caam_rng);
+	if (!err) {
+		init_done = true;
+		return err;
+	}
 
 free_rng_ctx:
 	kfree(rng_ctx);
diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h
index 8639b2df..60e2a54 100644
--- a/drivers/crypto/caam/compat.h
+++ b/drivers/crypto/caam/compat.h
@@ -32,7 +32,7 @@
 #include <crypto/null.h>
 #include <crypto/aes.h>
 #include <crypto/ctr.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/gcm.h>
 #include <crypto/sha.h>
 #include <crypto/md5.h>
diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c
index 4e43ca4..db22777d5 100644
--- a/drivers/crypto/caam/ctrl.c
+++ b/drivers/crypto/caam/ctrl.c
@@ -26,16 +26,6 @@
 #endif
 
 /*
- * i.MX targets tend to have clock control subsystems that can
- * enable/disable clocking to our device.
- */
-static inline struct clk *caam_drv_identify_clk(struct device *dev,
-						char *clk_name)
-{
-	return caam_imx ? devm_clk_get(dev, clk_name) : NULL;
-}
-
-/*
  * Descriptor to instantiate RNG State Handle 0 in normal mode and
  * load the JDKEK, TDKEK and TDSK registers
  */
@@ -107,7 +97,12 @@
 	int i;
 
 
-	if (ctrlpriv->virt_en == 1) {
+	if (ctrlpriv->virt_en == 1 ||
+	    /*
+	     * Apparently on i.MX8MQ it doesn't matter if virt_en == 1
+	     * and the following steps should be performed regardless
+	     */
+	    of_machine_is_compatible("fsl,imx8mq")) {
 		clrsetbits_32(&ctrl->deco_rsr, 0, DECORSR_JR0);
 
 		while (!(rd_reg32(&ctrl->deco_rsr) & DECORSR_VALID) &&
@@ -342,13 +337,6 @@
 	/* Unmap controller region */
 	iounmap(ctrl);
 
-	/* shut clocks off before finalizing shutdown */
-	clk_disable_unprepare(ctrlpriv->caam_ipg);
-	if (ctrlpriv->caam_mem)
-		clk_disable_unprepare(ctrlpriv->caam_mem);
-	clk_disable_unprepare(ctrlpriv->caam_aclk);
-	if (ctrlpriv->caam_emi_slow)
-		clk_disable_unprepare(ctrlpriv->caam_emi_slow);
 	return 0;
 }
 
@@ -497,20 +485,99 @@
 };
 MODULE_DEVICE_TABLE(of, caam_match);
 
+struct caam_imx_data {
+	const struct clk_bulk_data *clks;
+	int num_clks;
+};
+
+static const struct clk_bulk_data caam_imx6_clks[] = {
+	{ .id = "ipg" },
+	{ .id = "mem" },
+	{ .id = "aclk" },
+	{ .id = "emi_slow" },
+};
+
+static const struct caam_imx_data caam_imx6_data = {
+	.clks = caam_imx6_clks,
+	.num_clks = ARRAY_SIZE(caam_imx6_clks),
+};
+
+static const struct clk_bulk_data caam_imx7_clks[] = {
+	{ .id = "ipg" },
+	{ .id = "aclk" },
+};
+
+static const struct caam_imx_data caam_imx7_data = {
+	.clks = caam_imx7_clks,
+	.num_clks = ARRAY_SIZE(caam_imx7_clks),
+};
+
+static const struct clk_bulk_data caam_imx6ul_clks[] = {
+	{ .id = "ipg" },
+	{ .id = "mem" },
+	{ .id = "aclk" },
+};
+
+static const struct caam_imx_data caam_imx6ul_data = {
+	.clks = caam_imx6ul_clks,
+	.num_clks = ARRAY_SIZE(caam_imx6ul_clks),
+};
+
+static const struct soc_device_attribute caam_imx_soc_table[] = {
+	{ .soc_id = "i.MX6UL", .data = &caam_imx6ul_data },
+	{ .soc_id = "i.MX6*",  .data = &caam_imx6_data },
+	{ .soc_id = "i.MX7*",  .data = &caam_imx7_data },
+	{ .soc_id = "i.MX8MQ", .data = &caam_imx7_data },
+	{ .family = "Freescale i.MX" },
+	{ /* sentinel */ }
+};
+
+static void disable_clocks(void *data)
+{
+	struct caam_drv_private *ctrlpriv = data;
+
+	clk_bulk_disable_unprepare(ctrlpriv->num_clks, ctrlpriv->clks);
+}
+
+static int init_clocks(struct device *dev, const struct caam_imx_data *data)
+{
+	struct caam_drv_private *ctrlpriv = dev_get_drvdata(dev);
+	int ret;
+
+	ctrlpriv->num_clks = data->num_clks;
+	ctrlpriv->clks = devm_kmemdup(dev, data->clks,
+				      data->num_clks * sizeof(data->clks[0]),
+				      GFP_KERNEL);
+	if (!ctrlpriv->clks)
+		return -ENOMEM;
+
+	ret = devm_clk_bulk_get(dev, ctrlpriv->num_clks, ctrlpriv->clks);
+	if (ret) {
+		dev_err(dev,
+			"Failed to request all necessary clocks\n");
+		return ret;
+	}
+
+	ret = clk_bulk_prepare_enable(ctrlpriv->num_clks, ctrlpriv->clks);
+	if (ret) {
+		dev_err(dev,
+			"Failed to prepare/enable all necessary clocks\n");
+		return ret;
+	}
+
+	return devm_add_action_or_reset(dev, disable_clocks, ctrlpriv);
+}
+
 /* Probe routine for CAAM top (controller) level */
 static int caam_probe(struct platform_device *pdev)
 {
 	int ret, ring, gen_sk, ent_delay = RTSDCTL_ENT_DLY_MIN;
 	u64 caam_id;
-	static const struct soc_device_attribute imx_soc[] = {
-		{.family = "Freescale i.MX"},
-		{},
-	};
+	const struct soc_device_attribute *imx_soc_match;
 	struct device *dev;
 	struct device_node *nprop, *np;
 	struct caam_ctrl __iomem *ctrl;
 	struct caam_drv_private *ctrlpriv;
-	struct clk *clk;
 #ifdef CONFIG_DEBUG_FS
 	struct caam_perfmon *perfmon;
 #endif
@@ -527,103 +594,68 @@
 	dev_set_drvdata(dev, ctrlpriv);
 	nprop = pdev->dev.of_node;
 
-	caam_imx = (bool)soc_device_match(imx_soc);
+	imx_soc_match = soc_device_match(caam_imx_soc_table);
+	caam_imx = (bool)imx_soc_match;
 
-	/* Enable clocking */
-	clk = caam_drv_identify_clk(&pdev->dev, "ipg");
-	if (IS_ERR(clk)) {
-		ret = PTR_ERR(clk);
-		dev_err(&pdev->dev,
-			"can't identify CAAM ipg clk: %d\n", ret);
-		return ret;
-	}
-	ctrlpriv->caam_ipg = clk;
+	if (imx_soc_match) {
+		if (!imx_soc_match->data) {
+			dev_err(dev, "No clock data provided for i.MX SoC");
+			return -EINVAL;
+		}
 
-	if (!of_machine_is_compatible("fsl,imx7d") &&
-	    !of_machine_is_compatible("fsl,imx7s") &&
-	    !of_machine_is_compatible("fsl,imx7ulp")) {
-		clk = caam_drv_identify_clk(&pdev->dev, "mem");
-		if (IS_ERR(clk)) {
-			ret = PTR_ERR(clk);
-			dev_err(&pdev->dev,
-				"can't identify CAAM mem clk: %d\n", ret);
+		ret = init_clocks(dev, imx_soc_match->data);
+		if (ret)
 			return ret;
-		}
-		ctrlpriv->caam_mem = clk;
 	}
 
-	clk = caam_drv_identify_clk(&pdev->dev, "aclk");
-	if (IS_ERR(clk)) {
-		ret = PTR_ERR(clk);
-		dev_err(&pdev->dev,
-			"can't identify CAAM aclk clk: %d\n", ret);
-		return ret;
-	}
-	ctrlpriv->caam_aclk = clk;
-
-	if (!of_machine_is_compatible("fsl,imx6ul") &&
-	    !of_machine_is_compatible("fsl,imx7d") &&
-	    !of_machine_is_compatible("fsl,imx7s") &&
-	    !of_machine_is_compatible("fsl,imx7ulp")) {
-		clk = caam_drv_identify_clk(&pdev->dev, "emi_slow");
-		if (IS_ERR(clk)) {
-			ret = PTR_ERR(clk);
-			dev_err(&pdev->dev,
-				"can't identify CAAM emi_slow clk: %d\n", ret);
-			return ret;
-		}
-		ctrlpriv->caam_emi_slow = clk;
-	}
-
-	ret = clk_prepare_enable(ctrlpriv->caam_ipg);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "can't enable CAAM ipg clock: %d\n", ret);
-		return ret;
-	}
-
-	if (ctrlpriv->caam_mem) {
-		ret = clk_prepare_enable(ctrlpriv->caam_mem);
-		if (ret < 0) {
-			dev_err(&pdev->dev, "can't enable CAAM secure mem clock: %d\n",
-				ret);
-			goto disable_caam_ipg;
-		}
-	}
-
-	ret = clk_prepare_enable(ctrlpriv->caam_aclk);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "can't enable CAAM aclk clock: %d\n", ret);
-		goto disable_caam_mem;
-	}
-
-	if (ctrlpriv->caam_emi_slow) {
-		ret = clk_prepare_enable(ctrlpriv->caam_emi_slow);
-		if (ret < 0) {
-			dev_err(&pdev->dev, "can't enable CAAM emi slow clock: %d\n",
-				ret);
-			goto disable_caam_aclk;
-		}
-	}
 
 	/* Get configuration properties from device tree */
 	/* First, get register page */
 	ctrl = of_iomap(nprop, 0);
-	if (ctrl == NULL) {
+	if (!ctrl) {
 		dev_err(dev, "caam: of_iomap() failed\n");
-		ret = -ENOMEM;
-		goto disable_caam_emi_slow;
+		return -ENOMEM;
 	}
 
 	caam_little_end = !(bool)(rd_reg32(&ctrl->perfmon.status) &
 				  (CSTA_PLEND | CSTA_ALT_PLEND));
-
-	/* Finding the page size for using the CTPR_MS register */
 	comp_params = rd_reg32(&ctrl->perfmon.comp_parms_ms);
-	pg_size = (comp_params & CTPR_MS_PG_SZ_MASK) >> CTPR_MS_PG_SZ_SHIFT;
+	if (comp_params & CTPR_MS_PS && rd_reg32(&ctrl->mcr) & MCFGR_LONG_PTR)
+		caam_ptr_sz = sizeof(u64);
+	else
+		caam_ptr_sz = sizeof(u32);
+	caam_dpaa2 = !!(comp_params & CTPR_MS_DPAA2);
+	ctrlpriv->qi_present = !!(comp_params & CTPR_MS_QI_MASK);
+
+#ifdef CONFIG_CAAM_QI
+	/* If (DPAA 1.x) QI present, check whether dependencies are available */
+	if (ctrlpriv->qi_present && !caam_dpaa2) {
+		ret = qman_is_probed();
+		if (!ret) {
+			ret = -EPROBE_DEFER;
+			goto iounmap_ctrl;
+		} else if (ret < 0) {
+			dev_err(dev, "failing probe due to qman probe error\n");
+			ret = -ENODEV;
+			goto iounmap_ctrl;
+		}
+
+		ret = qman_portals_probed();
+		if (!ret) {
+			ret = -EPROBE_DEFER;
+			goto iounmap_ctrl;
+		} else if (ret < 0) {
+			dev_err(dev, "failing probe due to qman portals probe error\n");
+			ret = -ENODEV;
+			goto iounmap_ctrl;
+		}
+	}
+#endif
 
 	/* Allocating the BLOCK_OFFSET based on the supported page size on
 	 * the platform
 	 */
+	pg_size = (comp_params & CTPR_MS_PG_SZ_MASK) >> CTPR_MS_PG_SZ_SHIFT;
 	if (pg_size == 0)
 		BLOCK_OFFSET = PG_SIZE_4K;
 	else
@@ -648,7 +680,6 @@
 	 * In case of SoCs with Management Complex, MC f/w performs
 	 * the configuration.
 	 */
-	caam_dpaa2 = !!(comp_params & CTPR_MS_DPAA2);
 	np = of_find_compatible_node(NULL, NULL, "fsl,qoriq-mc");
 	ctrlpriv->mc_en = !!np;
 	of_node_put(np);
@@ -688,16 +719,7 @@
 			      JRSTART_JR1_START | JRSTART_JR2_START |
 			      JRSTART_JR3_START);
 
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		if (caam_dpaa2)
-			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(49));
-		else if (of_device_is_compatible(nprop, "fsl,sec-v5.0"))
-			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(40));
-		else
-			ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(36));
-	} else {
-		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
-	}
+	ret = dma_set_mask_and_coherent(dev, caam_get_dma_mask(dev));
 	if (ret) {
 		dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n", ret);
 		goto iounmap_ctrl;
@@ -719,7 +741,6 @@
 #endif
 
 	/* Check to see if (DPAA 1.x) QI present. If so, enable */
-	ctrlpriv->qi_present = !!(comp_params & CTPR_MS_QI_MASK);
 	if (ctrlpriv->qi_present && !caam_dpaa2) {
 		ctrlpriv->qi = (struct caam_queue_if __iomem __force *)
 			       ((__force uint8_t *)ctrl +
@@ -908,16 +929,6 @@
 #endif
 iounmap_ctrl:
 	iounmap(ctrl);
-disable_caam_emi_slow:
-	if (ctrlpriv->caam_emi_slow)
-		clk_disable_unprepare(ctrlpriv->caam_emi_slow);
-disable_caam_aclk:
-	clk_disable_unprepare(ctrlpriv->caam_aclk);
-disable_caam_mem:
-	if (ctrlpriv->caam_mem)
-		clk_disable_unprepare(ctrlpriv->caam_mem);
-disable_caam_ipg:
-	clk_disable_unprepare(ctrlpriv->caam_ipg);
 	return ret;
 }
 
diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h
index 5988a26..62ce642 100644
--- a/drivers/crypto/caam/desc_constr.h
+++ b/drivers/crypto/caam/desc_constr.h
@@ -14,9 +14,41 @@
 
 #define IMMEDIATE (1 << 23)
 #define CAAM_CMD_SZ sizeof(u32)
-#define CAAM_PTR_SZ sizeof(dma_addr_t)
+#define CAAM_PTR_SZ caam_ptr_sz
+#define CAAM_PTR_SZ_MAX sizeof(dma_addr_t)
+#define CAAM_PTR_SZ_MIN sizeof(u32)
 #define CAAM_DESC_BYTES_MAX (CAAM_CMD_SZ * MAX_CAAM_DESCSIZE)
-#define DESC_JOB_IO_LEN (CAAM_CMD_SZ * 5 + CAAM_PTR_SZ * 3)
+#define __DESC_JOB_IO_LEN(n) (CAAM_CMD_SZ * 5 + (n) * 3)
+#define DESC_JOB_IO_LEN __DESC_JOB_IO_LEN(CAAM_PTR_SZ)
+#define DESC_JOB_IO_LEN_MAX __DESC_JOB_IO_LEN(CAAM_PTR_SZ_MAX)
+#define DESC_JOB_IO_LEN_MIN __DESC_JOB_IO_LEN(CAAM_PTR_SZ_MIN)
+
+/*
+ * The CAAM QI hardware constructs a job descriptor which points
+ * to shared descriptor (as pointed by context_a of FQ to CAAM).
+ * When the job descriptor is executed by deco, the whole job
+ * descriptor together with shared descriptor gets loaded in
+ * deco buffer which is 64 words long (each 32-bit).
+ *
+ * The job descriptor constructed by QI hardware has layout:
+ *
+ *	HEADER		(1 word)
+ *	Shdesc ptr	(1 or 2 words)
+ *	SEQ_OUT_PTR	(1 word)
+ *	Out ptr		(1 or 2 words)
+ *	Out length	(1 word)
+ *	SEQ_IN_PTR	(1 word)
+ *	In ptr		(1 or 2 words)
+ *	In length	(1 word)
+ *
+ * The shdesc ptr is used to fetch shared descriptor contents
+ * into deco buffer.
+ *
+ * Apart from shdesc contents, the total number of words that
+ * get loaded in deco buffer are '8' or '11'. The remaining words
+ * in deco buffer can be used for storing shared descriptor.
+ */
+#define MAX_SDLEN	((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN_MIN) / CAAM_CMD_SZ)
 
 #ifdef DEBUG
 #define PRINT_POS do { printk(KERN_DEBUG "%02d: %s\n", desc_len(desc),\
@@ -37,6 +69,7 @@
 			       (LDOFF_ENABLE_AUTO_NFIFO << LDST_OFFSET_SHIFT))
 
 extern bool caam_little_end;
+extern size_t caam_ptr_sz;
 
 /*
  * HW fetches 4 S/G table entries at a time, irrespective of how many entries
@@ -103,9 +136,15 @@
 
 static inline void append_ptr(u32 * const desc, dma_addr_t ptr)
 {
-	dma_addr_t *offset = (dma_addr_t *)desc_end(desc);
+	if (caam_ptr_sz == sizeof(dma_addr_t)) {
+		dma_addr_t *offset = (dma_addr_t *)desc_end(desc);
 
-	*offset = cpu_to_caam_dma(ptr);
+		*offset = cpu_to_caam_dma(ptr);
+	} else {
+		u32 *offset = (u32 *)desc_end(desc);
+
+		*offset = cpu_to_caam_dma(ptr);
+	}
 
 	(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) +
 				CAAM_PTR_SZ / CAAM_CMD_SZ);
@@ -457,8 +496,8 @@
  *           functions where it is used.
  * @keylen: length of the provided algorithm key, in bytes
  * @keylen_pad: padded length of the provided algorithm key, in bytes
- * @key: address where algorithm key resides; virtual address if key_inline
- *       is true, dma (bus) address if key_inline is false.
+ * @key_dma: dma (bus) address where algorithm key resides
+ * @key_virt: virtual address where algorithm key resides
  * @key_inline: true - key can be inlined in the descriptor; false - key is
  *              referenced by the descriptor
  */
@@ -466,10 +505,8 @@
 	u32 algtype;
 	unsigned int keylen;
 	unsigned int keylen_pad;
-	union {
-		dma_addr_t key_dma;
-		const void *key_virt;
-	};
+	dma_addr_t key_dma;
+	const void *key_virt;
 	bool key_inline;
 };
 
@@ -535,14 +572,26 @@
 	if (adata->key_inline) {
 		int words;
 
-		append_operation(desc, OP_TYPE_UNI_PROTOCOL | protid |
-				 OP_PCL_DKP_SRC_IMM | OP_PCL_DKP_DST_IMM |
-				 adata->keylen);
-		append_data(desc, adata->key_virt, adata->keylen);
+		if (adata->keylen > adata->keylen_pad) {
+			append_operation(desc, OP_TYPE_UNI_PROTOCOL | protid |
+					 OP_PCL_DKP_SRC_PTR |
+					 OP_PCL_DKP_DST_IMM | adata->keylen);
+			append_ptr(desc, adata->key_dma);
+
+			words = (ALIGN(adata->keylen_pad, CAAM_CMD_SZ) -
+				 CAAM_PTR_SZ) / CAAM_CMD_SZ;
+		} else {
+			append_operation(desc, OP_TYPE_UNI_PROTOCOL | protid |
+					 OP_PCL_DKP_SRC_IMM |
+					 OP_PCL_DKP_DST_IMM | adata->keylen);
+			append_data(desc, adata->key_virt, adata->keylen);
+
+			words = (ALIGN(adata->keylen_pad, CAAM_CMD_SZ) -
+				 ALIGN(adata->keylen, CAAM_CMD_SZ)) /
+				CAAM_CMD_SZ;
+		}
 
 		/* Reserve space in descriptor buffer for the derived key */
-		words = (ALIGN(adata->keylen_pad, CAAM_CMD_SZ) -
-			 ALIGN(adata->keylen, CAAM_CMD_SZ)) / CAAM_CMD_SZ;
 		if (words)
 			(*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + words);
 	} else {
diff --git a/drivers/crypto/caam/dpseci-debugfs.c b/drivers/crypto/caam/dpseci-debugfs.c
new file mode 100644
index 0000000..c5bfc92
--- /dev/null
+++ b/drivers/crypto/caam/dpseci-debugfs.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2019 NXP */
+
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/debugfs.h>
+#include "dpseci-debugfs.h"
+
+static int dpseci_dbg_fqs_show(struct seq_file *file, void *offset)
+{
+	struct dpaa2_caam_priv *priv = (struct dpaa2_caam_priv *)file->private;
+	u32 fqid, fcnt, bcnt;
+	int i, err;
+
+	seq_printf(file, "FQ stats for %s:\n", dev_name(priv->dev));
+	seq_printf(file, "%s%16s%16s\n",
+		   "Rx-VFQID",
+		   "Pending frames",
+		   "Pending bytes");
+
+	for (i = 0; i <  priv->num_pairs; i++) {
+		fqid = priv->rx_queue_attr[i].fqid;
+		err = dpaa2_io_query_fq_count(NULL, fqid, &fcnt, &bcnt);
+		if (err)
+			continue;
+
+		seq_printf(file, "%5d%16u%16u\n", fqid, fcnt, bcnt);
+	}
+
+	seq_printf(file, "%s%16s%16s\n",
+		   "Tx-VFQID",
+		   "Pending frames",
+		   "Pending bytes");
+
+	for (i = 0; i <  priv->num_pairs; i++) {
+		fqid = priv->tx_queue_attr[i].fqid;
+		err = dpaa2_io_query_fq_count(NULL, fqid, &fcnt, &bcnt);
+		if (err)
+			continue;
+
+		seq_printf(file, "%5d%16u%16u\n", fqid, fcnt, bcnt);
+	}
+
+	return 0;
+}
+
+static int dpseci_dbg_fqs_open(struct inode *inode, struct file *file)
+{
+	int err;
+	struct dpaa2_caam_priv *priv;
+
+	priv = (struct dpaa2_caam_priv *)inode->i_private;
+
+	err = single_open(file, dpseci_dbg_fqs_show, priv);
+	if (err < 0)
+		dev_err(priv->dev, "single_open() failed\n");
+
+	return err;
+}
+
+static const struct file_operations dpseci_dbg_fq_ops = {
+	.open = dpseci_dbg_fqs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void dpaa2_dpseci_debugfs_init(struct dpaa2_caam_priv *priv)
+{
+	priv->dfs_root = debugfs_create_dir(dev_name(priv->dev), NULL);
+
+	debugfs_create_file("fq_stats", 0444, priv->dfs_root, priv,
+			    &dpseci_dbg_fq_ops);
+}
+
+void dpaa2_dpseci_debugfs_exit(struct dpaa2_caam_priv *priv)
+{
+	debugfs_remove_recursive(priv->dfs_root);
+}
diff --git a/drivers/crypto/caam/dpseci-debugfs.h b/drivers/crypto/caam/dpseci-debugfs.h
new file mode 100644
index 0000000..bc22af7b
--- /dev/null
+++ b/drivers/crypto/caam/dpseci-debugfs.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
+/* Copyright 2019 NXP */
+
+#ifndef DPSECI_DEBUGFS_H
+#define DPSECI_DEBUGFS_H
+
+#include <linux/dcache.h>
+#include "caamalg_qi2.h"
+
+#ifdef CONFIG_DEBUG_FS
+void dpaa2_dpseci_debugfs_init(struct dpaa2_caam_priv *priv);
+void dpaa2_dpseci_debugfs_exit(struct dpaa2_caam_priv *priv);
+#else
+static inline void dpaa2_dpseci_debugfs_init(struct dpaa2_caam_priv *priv) {}
+static inline void dpaa2_dpseci_debugfs_exit(struct dpaa2_caam_priv *priv) {}
+#endif /* CONFIG_DEBUG_FS */
+
+#endif /* DPSECI_DEBUGFS_H */
diff --git a/drivers/crypto/caam/error.c b/drivers/crypto/caam/error.c
index 4f0d458..17c6108 100644
--- a/drivers/crypto/caam/error.c
+++ b/drivers/crypto/caam/error.c
@@ -56,6 +56,9 @@
 bool caam_imx;
 EXPORT_SYMBOL(caam_imx);
 
+size_t caam_ptr_sz;
+EXPORT_SYMBOL(caam_ptr_sz);
+
 static const struct {
 	u8 value;
 	const char *error_text;
@@ -118,6 +121,7 @@
 	u8 value;
 	const char *error_text;
 } qi_error_list[] = {
+	{ 0x00, "No error" },
 	{ 0x1F, "Job terminated by FQ or ICID flush" },
 	{ 0x20, "FD format error"},
 	{ 0x21, "FD command format error"},
@@ -210,8 +214,8 @@
 	"Secure key generation",
 };
 
-static void report_ccb_status(struct device *jrdev, const u32 status,
-			      const char *error)
+static int report_ccb_status(struct device *jrdev, const u32 status,
+			     const char *error)
 {
 	u8 cha_id = (status & JRSTA_CCBERR_CHAID_MASK) >>
 		    JRSTA_CCBERR_CHAID_SHIFT;
@@ -247,22 +251,27 @@
 	 * CCB ICV check failures are part of normal operation life;
 	 * we leave the upper layers to do what they want with them.
 	 */
-	if (err_id != JRSTA_CCBERR_ERRID_ICVCHK)
-		dev_err(jrdev, "%08x: %s: %s %d: %s%s: %s%s\n",
-			status, error, idx_str, idx,
-			cha_str, cha_err_code,
-			err_str, err_err_code);
+	if (err_id == JRSTA_CCBERR_ERRID_ICVCHK)
+		return -EBADMSG;
+
+	dev_err_ratelimited(jrdev, "%08x: %s: %s %d: %s%s: %s%s\n", status,
+			    error, idx_str, idx, cha_str, cha_err_code,
+			    err_str, err_err_code);
+
+	return -EINVAL;
 }
 
-static void report_jump_status(struct device *jrdev, const u32 status,
-			       const char *error)
+static int report_jump_status(struct device *jrdev, const u32 status,
+			      const char *error)
 {
 	dev_err(jrdev, "%08x: %s: %s() not implemented\n",
 		status, error, __func__);
+
+	return -EINVAL;
 }
 
-static void report_deco_status(struct device *jrdev, const u32 status,
-			       const char *error)
+static int report_deco_status(struct device *jrdev, const u32 status,
+			      const char *error)
 {
 	u8 err_id = status & JRSTA_DECOERR_ERROR_MASK;
 	u8 idx = (status & JRSTA_DECOERR_INDEX_MASK) >>
@@ -288,10 +297,12 @@
 
 	dev_err(jrdev, "%08x: %s: %s %d: %s%s\n",
 		status, error, idx_str, idx, err_str, err_err_code);
+
+	return -EINVAL;
 }
 
-static void report_qi_status(struct device *qidev, const u32 status,
-			     const char *error)
+static int report_qi_status(struct device *qidev, const u32 status,
+			    const char *error)
 {
 	u8 err_id = status & JRSTA_QIERR_ERROR_MASK;
 	const char *err_str = "unidentified error value 0x";
@@ -309,27 +320,33 @@
 
 	dev_err(qidev, "%08x: %s: %s%s\n",
 		status, error, err_str, err_err_code);
+
+	return -EINVAL;
 }
 
-static void report_jr_status(struct device *jrdev, const u32 status,
-			     const char *error)
+static int report_jr_status(struct device *jrdev, const u32 status,
+			    const char *error)
 {
 	dev_err(jrdev, "%08x: %s: %s() not implemented\n",
 		status, error, __func__);
+
+	return -EINVAL;
 }
 
-static void report_cond_code_status(struct device *jrdev, const u32 status,
-				    const char *error)
+static int report_cond_code_status(struct device *jrdev, const u32 status,
+				   const char *error)
 {
 	dev_err(jrdev, "%08x: %s: %s() not implemented\n",
 		status, error, __func__);
+
+	return -EINVAL;
 }
 
-void caam_strstatus(struct device *jrdev, u32 status, bool qi_v2)
+int caam_strstatus(struct device *jrdev, u32 status, bool qi_v2)
 {
 	static const struct stat_src {
-		void (*report_ssed)(struct device *jrdev, const u32 status,
-				    const char *error);
+		int (*report_ssed)(struct device *jrdev, const u32 status,
+				   const char *error);
 		const char *error;
 	} status_src[16] = {
 		{ NULL, "No error" },
@@ -357,11 +374,14 @@
 	 * Otherwise print the error source name.
 	 */
 	if (status_src[ssrc].report_ssed)
-		status_src[ssrc].report_ssed(jrdev, status, error);
-	else if (error)
+		return status_src[ssrc].report_ssed(jrdev, status, error);
+
+	if (error)
 		dev_err(jrdev, "%d: %s\n", ssrc, error);
 	else
 		dev_err(jrdev, "%d: unknown error source\n", ssrc);
+
+	return -EINVAL;
 }
 EXPORT_SYMBOL(caam_strstatus);
 
diff --git a/drivers/crypto/caam/error.h b/drivers/crypto/caam/error.h
index d9726e6..16809fa 100644
--- a/drivers/crypto/caam/error.h
+++ b/drivers/crypto/caam/error.h
@@ -12,7 +12,7 @@
 
 #define CAAM_ERROR_STR_MAX 302
 
-void caam_strstatus(struct device *dev, u32 status, bool qi_v2);
+int caam_strstatus(struct device *dev, u32 status, bool qi_v2);
 
 #define caam_jr_strstatus(jrdev, status) caam_strstatus(jrdev, status, false)
 #define caam_qi2_strstatus(qidev, status) caam_strstatus(qidev, status, true)
diff --git a/drivers/crypto/caam/intern.h b/drivers/crypto/caam/intern.h
index 6af84bb..731b06b 100644
--- a/drivers/crypto/caam/intern.h
+++ b/drivers/crypto/caam/intern.h
@@ -10,6 +10,8 @@
 #ifndef INTERN_H
 #define INTERN_H
 
+#include "ctrl.h"
+
 /* Currently comes from Kconfig param as a ^2 (driver-required) */
 #define JOBR_DEPTH (1 << CONFIG_CRYPTO_DEV_FSL_CAAM_RINGSIZE)
 
@@ -53,10 +55,11 @@
 	spinlock_t inplock ____cacheline_aligned; /* Input ring index lock */
 	u32 inpring_avail;	/* Number of free entries in input ring */
 	int head;			/* entinfo (s/w ring) head index */
-	dma_addr_t *inpring;	/* Base of input ring, alloc DMA-safe */
+	void *inpring;			/* Base of input ring, alloc
+					 * DMA-safe */
 	int out_ring_read_index;	/* Output index "tail" */
 	int tail;			/* entinfo (s/w ring) tail index */
-	struct jr_outentry *outring;	/* Base of output ring, DMA-safe */
+	void *outring;			/* Base of output ring, DMA-safe */
 };
 
 /*
@@ -92,11 +95,8 @@
 				   Handles of the RNG4 block are initialized
 				   by this driver */
 
-	struct clk *caam_ipg;
-	struct clk *caam_mem;
-	struct clk *caam_aclk;
-	struct clk *caam_emi_slow;
-
+	struct clk_bulk_data *clks;
+	int num_clks;
 	/*
 	 * debugfs entries for developer view into driver/device
 	 * variables at runtime.
@@ -215,4 +215,22 @@
 DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n");
 #endif
 
+static inline u64 caam_get_dma_mask(struct device *dev)
+{
+	struct device_node *nprop = dev->of_node;
+
+	if (caam_ptr_sz != sizeof(u64))
+		return DMA_BIT_MASK(32);
+
+	if (caam_dpaa2)
+		return DMA_BIT_MASK(49);
+
+	if (of_device_is_compatible(nprop, "fsl,sec-v5.0-job-ring") ||
+	    of_device_is_compatible(nprop, "fsl,sec-v5.0"))
+		return DMA_BIT_MASK(40);
+
+	return DMA_BIT_MASK(36);
+}
+
+
 #endif /* INTERN_H */
diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c
index cea811f..fc97cde2 100644
--- a/drivers/crypto/caam/jr.c
+++ b/drivers/crypto/caam/jr.c
@@ -108,25 +108,12 @@
 static int caam_jr_shutdown(struct device *dev)
 {
 	struct caam_drv_private_jr *jrp = dev_get_drvdata(dev);
-	dma_addr_t inpbusaddr, outbusaddr;
 	int ret;
 
 	ret = caam_reset_hw_jr(dev);
 
 	tasklet_kill(&jrp->irqtask);
 
-	/* Release interrupt */
-	free_irq(jrp->irq, dev);
-
-	/* Free rings */
-	inpbusaddr = rd_reg64(&jrp->rregs->inpring_base);
-	outbusaddr = rd_reg64(&jrp->rregs->outring_base);
-	dma_free_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH,
-			  jrp->inpring, inpbusaddr);
-	dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH,
-			  jrp->outring, outbusaddr);
-	kfree(jrp->entinfo);
-
 	return ret;
 }
 
@@ -159,7 +146,6 @@
 	ret = caam_jr_shutdown(jrdev);
 	if (ret)
 		dev_err(jrdev, "Failed to shut down job ring\n");
-	irq_dispose_mapping(jrpriv->irq);
 
 	return ret;
 }
@@ -224,7 +210,7 @@
 		for (i = 0; CIRC_CNT(head, tail + i, JOBR_DEPTH) >= 1; i++) {
 			sw_idx = (tail + i) & (JOBR_DEPTH - 1);
 
-			if (jrp->outring[hw_idx].desc ==
+			if (jr_outentry_desc(jrp->outring, hw_idx) ==
 			    caam_dma_to_cpu(jrp->entinfo[sw_idx].desc_addr_dma))
 				break; /* found */
 		}
@@ -233,7 +219,8 @@
 
 		/* Unmap just-run descriptor so we can post-process */
 		dma_unmap_single(dev,
-				 caam_dma_to_cpu(jrp->outring[hw_idx].desc),
+				 caam_dma_to_cpu(jr_outentry_desc(jrp->outring,
+								  hw_idx)),
 				 jrp->entinfo[sw_idx].desc_size,
 				 DMA_TO_DEVICE);
 
@@ -244,7 +231,8 @@
 		usercall = jrp->entinfo[sw_idx].callbk;
 		userarg = jrp->entinfo[sw_idx].cbkarg;
 		userdesc = jrp->entinfo[sw_idx].desc_addr_virt;
-		userstatus = caam32_to_cpu(jrp->outring[hw_idx].jrstatus);
+		userstatus = caam32_to_cpu(jr_outentry_jrstatus(jrp->outring,
+								hw_idx));
 
 		/*
 		 * Make sure all information from the job has been obtained
@@ -399,7 +387,7 @@
 	head_entry->cbkarg = areq;
 	head_entry->desc_addr_dma = desc_dma;
 
-	jrp->inpring[head] = cpu_to_caam_dma(desc_dma);
+	jr_inpentry_set(jrp->inpring, head, cpu_to_caam_dma(desc_dma));
 
 	/*
 	 * Guarantee that the descriptor's DMA address has been written to
@@ -441,35 +429,26 @@
 
 	jrp = dev_get_drvdata(dev);
 
-	tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
-
-	/* Connect job ring interrupt handler. */
-	error = request_irq(jrp->irq, caam_jr_interrupt, IRQF_SHARED,
-			    dev_name(dev), dev);
-	if (error) {
-		dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
-			jrp->ridx, jrp->irq);
-		goto out_kill_deq;
-	}
-
 	error = caam_reset_hw_jr(dev);
 	if (error)
-		goto out_free_irq;
+		return error;
 
-	error = -ENOMEM;
-	jrp->inpring = dma_alloc_coherent(dev, sizeof(*jrp->inpring) *
-					  JOBR_DEPTH, &inpbusaddr, GFP_KERNEL);
+	jrp->inpring = dmam_alloc_coherent(dev, SIZEOF_JR_INPENTRY *
+					   JOBR_DEPTH, &inpbusaddr,
+					   GFP_KERNEL);
 	if (!jrp->inpring)
-		goto out_free_irq;
+		return -ENOMEM;
 
-	jrp->outring = dma_alloc_coherent(dev, sizeof(*jrp->outring) *
-					  JOBR_DEPTH, &outbusaddr, GFP_KERNEL);
+	jrp->outring = dmam_alloc_coherent(dev, SIZEOF_JR_OUTENTRY *
+					   JOBR_DEPTH, &outbusaddr,
+					   GFP_KERNEL);
 	if (!jrp->outring)
-		goto out_free_inpring;
+		return -ENOMEM;
 
-	jrp->entinfo = kcalloc(JOBR_DEPTH, sizeof(*jrp->entinfo), GFP_KERNEL);
+	jrp->entinfo = devm_kcalloc(dev, JOBR_DEPTH, sizeof(*jrp->entinfo),
+				    GFP_KERNEL);
 	if (!jrp->entinfo)
-		goto out_free_outring;
+		return -ENOMEM;
 
 	for (i = 0; i < JOBR_DEPTH; i++)
 		jrp->entinfo[i].desc_addr_dma = !0;
@@ -493,22 +472,24 @@
 		      (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) |
 		      (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT));
 
-	return 0;
+	tasklet_init(&jrp->irqtask, caam_jr_dequeue, (unsigned long)dev);
 
-out_free_outring:
-	dma_free_coherent(dev, sizeof(struct jr_outentry) * JOBR_DEPTH,
-			  jrp->outring, outbusaddr);
-out_free_inpring:
-	dma_free_coherent(dev, sizeof(dma_addr_t) * JOBR_DEPTH,
-			  jrp->inpring, inpbusaddr);
-	dev_err(dev, "can't allocate job rings for %d\n", jrp->ridx);
-out_free_irq:
-	free_irq(jrp->irq, dev);
-out_kill_deq:
-	tasklet_kill(&jrp->irqtask);
+	/* Connect job ring interrupt handler. */
+	error = devm_request_irq(dev, jrp->irq, caam_jr_interrupt, IRQF_SHARED,
+				 dev_name(dev), dev);
+	if (error) {
+		dev_err(dev, "can't connect JobR %d interrupt (%d)\n",
+			jrp->ridx, jrp->irq);
+		tasklet_kill(&jrp->irqtask);
+	}
+
 	return error;
 }
 
+static void caam_jr_irq_dispose_mapping(void *data)
+{
+	irq_dispose_mapping((unsigned long)data);
+}
 
 /*
  * Probe routine for each detected JobR subsystem.
@@ -520,6 +501,7 @@
 	struct caam_job_ring __iomem *ctrl;
 	struct caam_drv_private_jr *jrpriv;
 	static int total_jobrs;
+	struct resource *r;
 	int error;
 
 	jrdev = &pdev->dev;
@@ -535,45 +517,43 @@
 	nprop = pdev->dev.of_node;
 	/* Get configuration properties from device tree */
 	/* First, get register page */
-	ctrl = of_iomap(nprop, 0);
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!r) {
+		dev_err(jrdev, "platform_get_resource() failed\n");
+		return -ENOMEM;
+	}
+
+	ctrl = devm_ioremap(jrdev, r->start, resource_size(r));
 	if (!ctrl) {
-		dev_err(jrdev, "of_iomap() failed\n");
+		dev_err(jrdev, "devm_ioremap() failed\n");
 		return -ENOMEM;
 	}
 
 	jrpriv->rregs = (struct caam_job_ring __iomem __force *)ctrl;
 
-	if (sizeof(dma_addr_t) == sizeof(u64)) {
-		if (caam_dpaa2)
-			error = dma_set_mask_and_coherent(jrdev,
-							  DMA_BIT_MASK(49));
-		else if (of_device_is_compatible(nprop,
-						 "fsl,sec-v5.0-job-ring"))
-			error = dma_set_mask_and_coherent(jrdev,
-							  DMA_BIT_MASK(40));
-		else
-			error = dma_set_mask_and_coherent(jrdev,
-							  DMA_BIT_MASK(36));
-	} else {
-		error = dma_set_mask_and_coherent(jrdev, DMA_BIT_MASK(32));
-	}
+	error = dma_set_mask_and_coherent(jrdev, caam_get_dma_mask(jrdev));
 	if (error) {
 		dev_err(jrdev, "dma_set_mask_and_coherent failed (%d)\n",
 			error);
-		iounmap(ctrl);
 		return error;
 	}
 
 	/* Identify the interrupt */
 	jrpriv->irq = irq_of_parse_and_map(nprop, 0);
+	if (!jrpriv->irq) {
+		dev_err(jrdev, "irq_of_parse_and_map failed\n");
+		return -EINVAL;
+	}
+
+	error = devm_add_action_or_reset(jrdev, caam_jr_irq_dispose_mapping,
+					 (void *)(unsigned long)jrpriv->irq);
+	if (error)
+		return error;
 
 	/* Now do the platform independent part */
 	error = caam_jr_init(jrdev); /* now turn on hardware */
-	if (error) {
-		irq_dispose_mapping(jrpriv->irq);
-		iounmap(ctrl);
+	if (error)
 		return error;
-	}
 
 	jrpriv->dev = jrdev;
 	spin_lock(&driver_data.jr_alloc_lock);
diff --git a/drivers/crypto/caam/key_gen.c b/drivers/crypto/caam/key_gen.c
index 48dd353..5a851dd 100644
--- a/drivers/crypto/caam/key_gen.c
+++ b/drivers/crypto/caam/key_gen.c
@@ -15,13 +15,14 @@
 			   void *context)
 {
 	struct split_key_result *res = context;
+	int ecode = 0;
 
 	dev_dbg(dev, "%s %d: err 0x%x\n", __func__, __LINE__, err);
 
 	if (err)
-		caam_jr_strstatus(dev, err);
+		ecode = caam_jr_strstatus(dev, err);
 
-	res->err = err;
+	res->err = ecode;
 
 	complete(&res->completion);
 }
@@ -47,18 +48,20 @@
 	u32 *desc;
 	struct split_key_result result;
 	dma_addr_t dma_addr;
+	unsigned int local_max;
 	int ret = -ENOMEM;
 
 	adata->keylen = split_key_len(adata->algtype & OP_ALG_ALGSEL_MASK);
 	adata->keylen_pad = split_key_pad_len(adata->algtype &
 					      OP_ALG_ALGSEL_MASK);
+	local_max = max(keylen, adata->keylen_pad);
 
 	dev_dbg(jrdev, "split keylen %d split keylen padded %d\n",
 		adata->keylen, adata->keylen_pad);
 	print_hex_dump_debug("ctx.key@" __stringify(__LINE__)": ",
 			     DUMP_PREFIX_ADDRESS, 16, 4, key_in, keylen, 1);
 
-	if (adata->keylen_pad > max_keylen)
+	if (local_max > max_keylen)
 		return -EINVAL;
 
 	desc = kmalloc(CAAM_CMD_SZ * 6 + CAAM_PTR_SZ * 2, GFP_KERNEL | GFP_DMA);
@@ -69,8 +72,7 @@
 
 	memcpy(key_out, key_in, keylen);
 
-	dma_addr = dma_map_single(jrdev, key_out, adata->keylen_pad,
-				  DMA_BIDIRECTIONAL);
+	dma_addr = dma_map_single(jrdev, key_out, local_max, DMA_BIDIRECTIONAL);
 	if (dma_mapping_error(jrdev, dma_addr)) {
 		dev_err(jrdev, "unable to map key memory\n");
 		goto out_free;
@@ -116,7 +118,7 @@
 				     adata->keylen_pad, 1);
 	}
 
-	dma_unmap_single(jrdev, dma_addr, adata->keylen_pad, DMA_BIDIRECTIONAL);
+	dma_unmap_single(jrdev, dma_addr, local_max, DMA_BIDIRECTIONAL);
 out_free:
 	kfree(desc);
 	return ret;
diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h
index 810f0be..68c1fd5 100644
--- a/drivers/crypto/caam/pdb.h
+++ b/drivers/crypto/caam/pdb.h
@@ -512,7 +512,9 @@
 	dma_addr_t	n_dma;
 	dma_addr_t	e_dma;
 	u32		f_len;
-} __packed;
+};
+
+#define SIZEOF_RSA_PUB_PDB	(2 * sizeof(u32) + 4 * caam_ptr_sz)
 
 /**
  * RSA Decrypt PDB - Private Key Form #1
@@ -528,7 +530,9 @@
 	dma_addr_t	f_dma;
 	dma_addr_t	n_dma;
 	dma_addr_t	d_dma;
-} __packed;
+};
+
+#define SIZEOF_RSA_PRIV_F1_PDB	(sizeof(u32) + 4 * caam_ptr_sz)
 
 /**
  * RSA Decrypt PDB - Private Key Form #2
@@ -554,7 +558,9 @@
 	dma_addr_t	tmp1_dma;
 	dma_addr_t	tmp2_dma;
 	u32		p_q_len;
-} __packed;
+};
+
+#define SIZEOF_RSA_PRIV_F2_PDB	(2 * sizeof(u32) + 7 * caam_ptr_sz)
 
 /**
  * RSA Decrypt PDB - Private Key Form #3
@@ -586,6 +592,8 @@
 	dma_addr_t	tmp1_dma;
 	dma_addr_t	tmp2_dma;
 	u32		p_q_len;
-} __packed;
+};
+
+#define SIZEOF_RSA_PRIV_F3_PDB	(2 * sizeof(u32) + 9 * caam_ptr_sz)
 
 #endif
diff --git a/drivers/crypto/caam/pkc_desc.c b/drivers/crypto/caam/pkc_desc.c
index 2a8d87e..0d5ee76 100644
--- a/drivers/crypto/caam/pkc_desc.c
+++ b/drivers/crypto/caam/pkc_desc.c
@@ -13,7 +13,7 @@
 /* Descriptor for RSA Public operation */
 void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb)
 {
-	init_job_desc_pdb(desc, 0, sizeof(*pdb));
+	init_job_desc_pdb(desc, 0, SIZEOF_RSA_PUB_PDB);
 	append_cmd(desc, pdb->sgf);
 	append_ptr(desc, pdb->f_dma);
 	append_ptr(desc, pdb->g_dma);
@@ -26,7 +26,7 @@
 /* Descriptor for RSA Private operation - Private Key Form #1 */
 void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb)
 {
-	init_job_desc_pdb(desc, 0, sizeof(*pdb));
+	init_job_desc_pdb(desc, 0, SIZEOF_RSA_PRIV_F1_PDB);
 	append_cmd(desc, pdb->sgf);
 	append_ptr(desc, pdb->g_dma);
 	append_ptr(desc, pdb->f_dma);
@@ -39,7 +39,7 @@
 /* Descriptor for RSA Private operation - Private Key Form #2 */
 void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb)
 {
-	init_job_desc_pdb(desc, 0, sizeof(*pdb));
+	init_job_desc_pdb(desc, 0, SIZEOF_RSA_PRIV_F2_PDB);
 	append_cmd(desc, pdb->sgf);
 	append_ptr(desc, pdb->g_dma);
 	append_ptr(desc, pdb->f_dma);
@@ -56,7 +56,7 @@
 /* Descriptor for RSA Private operation - Private Key Form #3 */
 void init_rsa_priv_f3_desc(u32 *desc, struct rsa_priv_f3_pdb *pdb)
 {
-	init_job_desc_pdb(desc, 0, sizeof(*pdb));
+	init_job_desc_pdb(desc, 0, SIZEOF_RSA_PRIV_F3_PDB);
 	append_cmd(desc, pdb->sgf);
 	append_ptr(desc, pdb->g_dma);
 	append_ptr(desc, pdb->f_dma);
diff --git a/drivers/crypto/caam/qi.c b/drivers/crypto/caam/qi.c
index 0fe618e..378f627 100644
--- a/drivers/crypto/caam/qi.c
+++ b/drivers/crypto/caam/qi.c
@@ -163,7 +163,10 @@
 	dma_unmap_single(drv_req->drv_ctx->qidev, qm_fd_addr(fd),
 			 sizeof(drv_req->fd_sgt), DMA_BIDIRECTIONAL);
 
-	drv_req->cbk(drv_req, -EIO);
+	if (fd->status)
+		drv_req->cbk(drv_req, be32_to_cpu(fd->status));
+	else
+		drv_req->cbk(drv_req, JRSTA_SSRC_QI);
 }
 
 static struct qman_fq *create_caam_req_fq(struct device *qidev,
@@ -574,8 +577,9 @@
 
 		if (ssrc != JRSTA_SSRC_CCB_ERROR ||
 		    err_id != JRSTA_CCBERR_ERRID_ICVCHK)
-			dev_err(qidev, "Error: %#x in CAAM response FD\n",
-				status);
+			dev_err_ratelimited(qidev,
+					    "Error: %#x in CAAM response FD\n",
+					    status);
 	}
 
 	if (unlikely(qm_fd_get_format(fd) != qm_fd_compound)) {
diff --git a/drivers/crypto/caam/qi.h b/drivers/crypto/caam/qi.h
index f93c9c7..db054954 100644
--- a/drivers/crypto/caam/qi.h
+++ b/drivers/crypto/caam/qi.h
@@ -14,32 +14,6 @@
 #include "desc.h"
 #include "desc_constr.h"
 
-/*
- * CAAM hardware constructs a job descriptor which points to a shared descriptor
- * (as pointed by context_a of to-CAAM FQ).
- * When the job descriptor is executed by DECO, the whole job descriptor
- * together with shared descriptor gets loaded in DECO buffer, which is
- * 64 words (each 32-bit) long.
- *
- * The job descriptor constructed by CAAM hardware has the following layout:
- *
- *	HEADER		(1 word)
- *	Shdesc ptr	(1 or 2 words)
- *	SEQ_OUT_PTR	(1 word)
- *	Out ptr		(1 or 2 words)
- *	Out length	(1 word)
- *	SEQ_IN_PTR	(1 word)
- *	In ptr		(1 or 2 words)
- *	In length	(1 word)
- *
- * The shdesc ptr is used to fetch shared descriptor contents into DECO buffer.
- *
- * Apart from shdesc contents, the total number of words that get loaded in DECO
- * buffer are '8' or '11'. The remaining words in DECO buffer can be used for
- * storing shared descriptor.
- */
-#define MAX_SDLEN	((CAAM_DESC_BYTES_MAX - DESC_JOB_IO_LEN) / CAAM_CMD_SZ)
-
 /* Length of a single buffer in the QI driver memory cache */
 #define CAAM_QI_MEMCACHE_SIZE	768
 
diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h
index 8591914..05127b7 100644
--- a/drivers/crypto/caam/regs.h
+++ b/drivers/crypto/caam/regs.h
@@ -12,6 +12,7 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/io.h>
+#include <linux/io-64-nonatomic-hi-lo.h>
 
 /*
  * Architecture-specific register access methods
@@ -70,6 +71,7 @@
 
 extern bool caam_little_end;
 extern bool caam_imx;
+extern size_t caam_ptr_sz;
 
 #define caam_to_cpu(len)						\
 static inline u##len caam##len ## _to_cpu(u##len val)			\
@@ -137,45 +139,37 @@
  *    base + 0x0000 : least-significant 32 bits
  *    base + 0x0004 : most-significant 32 bits
  */
-#ifdef CONFIG_64BIT
 static inline void wr_reg64(void __iomem *reg, u64 data)
 {
-	if (caam_little_end)
-		iowrite64(data, reg);
-	else
-		iowrite64be(data, reg);
-}
-
-static inline u64 rd_reg64(void __iomem *reg)
-{
-	if (caam_little_end)
-		return ioread64(reg);
-	else
-		return ioread64be(reg);
-}
-
-#else /* CONFIG_64BIT */
-static inline void wr_reg64(void __iomem *reg, u64 data)
-{
-	if (!caam_imx && caam_little_end) {
-		wr_reg32((u32 __iomem *)(reg) + 1, data >> 32);
-		wr_reg32((u32 __iomem *)(reg), data);
+	if (caam_little_end) {
+		if (caam_imx) {
+			iowrite32(data >> 32, (u32 __iomem *)(reg));
+			iowrite32(data, (u32 __iomem *)(reg) + 1);
+		} else {
+			iowrite64(data, reg);
+		}
 	} else {
-		wr_reg32((u32 __iomem *)(reg), data >> 32);
-		wr_reg32((u32 __iomem *)(reg) + 1, data);
+		iowrite64be(data, reg);
 	}
 }
 
 static inline u64 rd_reg64(void __iomem *reg)
 {
-	if (!caam_imx && caam_little_end)
-		return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 |
-			(u64)rd_reg32((u32 __iomem *)(reg)));
+	if (caam_little_end) {
+		if (caam_imx) {
+			u32 low, high;
 
-	return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 |
-		(u64)rd_reg32((u32 __iomem *)(reg) + 1));
+			high = ioread32(reg);
+			low  = ioread32(reg + sizeof(u32));
+
+			return low + ((u64)high << 32);
+		} else {
+			return ioread64(reg);
+		}
+	} else {
+		return ioread64be(reg);
+	}
 }
-#endif /* CONFIG_64BIT  */
 
 static inline u64 cpu_to_caam_dma64(dma_addr_t value)
 {
@@ -195,22 +189,89 @@
 	return caam64_to_cpu(value);
 }
 
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-#define cpu_to_caam_dma(value) cpu_to_caam_dma64(value)
-#define caam_dma_to_cpu(value) caam_dma64_to_cpu(value)
-#else
-#define cpu_to_caam_dma(value) cpu_to_caam32(value)
-#define caam_dma_to_cpu(value) caam32_to_cpu(value)
-#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */
+static inline u64 cpu_to_caam_dma(u64 value)
+{
+	if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) &&
+	    caam_ptr_sz == sizeof(u64))
+		return cpu_to_caam_dma64(value);
+	else
+		return cpu_to_caam32(value);
+}
+
+static inline u64 caam_dma_to_cpu(u64 value)
+{
+	if (IS_ENABLED(CONFIG_ARCH_DMA_ADDR_T_64BIT) &&
+	    caam_ptr_sz == sizeof(u64))
+		return caam_dma64_to_cpu(value);
+	else
+		return caam32_to_cpu(value);
+}
 
 /*
  * jr_outentry
  * Represents each entry in a JobR output ring
  */
-struct jr_outentry {
-	dma_addr_t desc;/* Pointer to completed descriptor */
-	u32 jrstatus;	/* Status for completed descriptor */
-} __packed;
+
+static inline void jr_outentry_get(void *outring, int hw_idx, dma_addr_t *desc,
+				   u32 *jrstatus)
+{
+
+	if (caam_ptr_sz == sizeof(u32)) {
+		struct {
+			u32 desc;
+			u32 jrstatus;
+		} __packed *outentry = outring;
+
+		*desc = outentry[hw_idx].desc;
+		*jrstatus = outentry[hw_idx].jrstatus;
+	} else {
+		struct {
+			dma_addr_t desc;/* Pointer to completed descriptor */
+			u32 jrstatus;	/* Status for completed descriptor */
+		} __packed *outentry = outring;
+
+		*desc = outentry[hw_idx].desc;
+		*jrstatus = outentry[hw_idx].jrstatus;
+	}
+}
+
+#define SIZEOF_JR_OUTENTRY	(caam_ptr_sz + sizeof(u32))
+
+static inline dma_addr_t jr_outentry_desc(void *outring, int hw_idx)
+{
+	dma_addr_t desc;
+	u32 unused;
+
+	jr_outentry_get(outring, hw_idx, &desc, &unused);
+
+	return desc;
+}
+
+static inline u32 jr_outentry_jrstatus(void *outring, int hw_idx)
+{
+	dma_addr_t unused;
+	u32 jrstatus;
+
+	jr_outentry_get(outring, hw_idx, &unused, &jrstatus);
+
+	return jrstatus;
+}
+
+static inline void jr_inpentry_set(void *inpring, int hw_idx, dma_addr_t val)
+{
+	if (caam_ptr_sz == sizeof(u32)) {
+		u32 *inpentry = inpring;
+
+		inpentry[hw_idx] = val;
+	} else {
+		dma_addr_t *inpentry = inpring;
+
+		inpentry[hw_idx] = val;
+	}
+}
+
+#define SIZEOF_JR_INPENTRY	caam_ptr_sz
+
 
 /* Version registers (Era 10+)	e80-eff */
 struct version_regs {
@@ -338,6 +399,7 @@
 	u32 cha_rev_ls;		/* CRNR - CHA Rev No. Least significant half*/
 #define CTPR_MS_QI_SHIFT	25
 #define CTPR_MS_QI_MASK		(0x1ull << CTPR_MS_QI_SHIFT)
+#define CTPR_MS_PS		BIT(17)
 #define CTPR_MS_DPAA2		BIT(13)
 #define CTPR_MS_VIRT_EN_INCL	0x00000001
 #define CTPR_MS_VIRT_EN_POR	0x00000002
@@ -641,6 +703,7 @@
 #define JRSTA_SSRC_CCB_ERROR        0x20000000
 #define JRSTA_SSRC_JUMP_HALT_USER   0x30000000
 #define JRSTA_SSRC_DECO             0x40000000
+#define JRSTA_SSRC_QI               0x50000000
 #define JRSTA_SSRC_JRERROR          0x60000000
 #define JRSTA_SSRC_JUMP_HALT_CC     0x70000000
 
diff --git a/drivers/crypto/cavium/cpt/cptvf_algs.c b/drivers/crypto/cavium/cpt/cptvf_algs.c
index ff3cb1f..596ce28 100644
--- a/drivers/crypto/cavium/cpt/cptvf_algs.c
+++ b/drivers/crypto/cavium/cpt/cptvf_algs.c
@@ -7,7 +7,7 @@
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <crypto/authenc.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/xts.h>
 #include <linux/crypto.h>
 #include <linux/err.h>
@@ -322,31 +322,15 @@
 static int cvm_cbc_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 			       u32 keylen)
 {
-	u32 flags = crypto_ablkcipher_get_flags(cipher);
-	int err;
-
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(cipher, flags);
-		return err;
-	}
-
-	return cvm_setkey(cipher, key, keylen, DES3_CBC);
+	return verify_ablkcipher_des3_key(cipher, key) ?:
+	       cvm_setkey(cipher, key, keylen, DES3_CBC);
 }
 
 static int cvm_ecb_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 			       u32 keylen)
 {
-	u32 flags = crypto_ablkcipher_get_flags(cipher);
-	int err;
-
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(cipher, flags);
-		return err;
-	}
-
-	return cvm_setkey(cipher, key, keylen, DES3_ECB);
+	return verify_ablkcipher_des3_key(cipher, key) ?:
+	       cvm_setkey(cipher, key, keylen, DES3_ECB);
 }
 
 static int cvm_enc_dec_init(struct crypto_tfm *tfm)
diff --git a/drivers/crypto/cavium/nitrox/Kconfig b/drivers/crypto/cavium/nitrox/Kconfig
index dab162a..7b1e751 100644
--- a/drivers/crypto/cavium/nitrox/Kconfig
+++ b/drivers/crypto/cavium/nitrox/Kconfig
@@ -6,7 +6,7 @@
 	tristate
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select FW_LOADER
 
 config CRYPTO_DEV_NITROX_CNN55XX
diff --git a/drivers/crypto/cavium/nitrox/nitrox_csr.h b/drivers/crypto/cavium/nitrox/nitrox_csr.h
index a2a4526..1c8715a 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_csr.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_csr.h
@@ -40,9 +40,77 @@
 #define EMU_FUSE_MAPX(_i)	(0x1402708 + ((_i) * 0x40000))
 
 /* UCD registers */
+#define UCD_SE_EID_UCODE_BLOCK_NUMX(_i)	(0x12C0000 + ((_i) * 0x1000))
+#define UCD_AE_EID_UCODE_BLOCK_NUMX(_i)	(0x12C0008 + ((_i) * 0x800))
 #define UCD_UCODE_LOAD_BLOCK_NUM	0x12C0010
 #define UCD_UCODE_LOAD_IDX_DATAX(_i)	(0x12C0018 + ((_i) * 0x20))
-#define UCD_SE_EID_UCODE_BLOCK_NUMX(_i)	(0x12C0000 + ((_i) * 0x1000))
+#define UCD_SE_CNTX(_i)			(0x12C0040 + ((_i) * 0x1000))
+#define UCD_AE_CNTX(_i)			(0x12C0048 + ((_i) * 0x800))
+
+/* AQM registers */
+#define AQM_CTL                         0x1300000
+#define AQM_INT                         0x1300008
+#define AQM_DBELL_OVF_LO                0x1300010
+#define AQM_DBELL_OVF_HI                0x1300018
+#define AQM_DBELL_OVF_LO_W1S            0x1300020
+#define AQM_DBELL_OVF_LO_ENA_W1C        0x1300028
+#define AQM_DBELL_OVF_LO_ENA_W1S        0x1300030
+#define AQM_DBELL_OVF_HI_W1S            0x1300038
+#define AQM_DBELL_OVF_HI_ENA_W1C        0x1300040
+#define AQM_DBELL_OVF_HI_ENA_W1S        0x1300048
+#define AQM_DMA_RD_ERR_LO               0x1300050
+#define AQM_DMA_RD_ERR_HI               0x1300058
+#define AQM_DMA_RD_ERR_LO_W1S           0x1300060
+#define AQM_DMA_RD_ERR_LO_ENA_W1C       0x1300068
+#define AQM_DMA_RD_ERR_LO_ENA_W1S       0x1300070
+#define AQM_DMA_RD_ERR_HI_W1S           0x1300078
+#define AQM_DMA_RD_ERR_HI_ENA_W1C       0x1300080
+#define AQM_DMA_RD_ERR_HI_ENA_W1S       0x1300088
+#define AQM_EXEC_NA_LO                  0x1300090
+#define AQM_EXEC_NA_HI                  0x1300098
+#define AQM_EXEC_NA_LO_W1S              0x13000A0
+#define AQM_EXEC_NA_LO_ENA_W1C          0x13000A8
+#define AQM_EXEC_NA_LO_ENA_W1S          0x13000B0
+#define AQM_EXEC_NA_HI_W1S              0x13000B8
+#define AQM_EXEC_NA_HI_ENA_W1C          0x13000C0
+#define AQM_EXEC_NA_HI_ENA_W1S          0x13000C8
+#define AQM_EXEC_ERR_LO                 0x13000D0
+#define AQM_EXEC_ERR_HI                 0x13000D8
+#define AQM_EXEC_ERR_LO_W1S             0x13000E0
+#define AQM_EXEC_ERR_LO_ENA_W1C         0x13000E8
+#define AQM_EXEC_ERR_LO_ENA_W1S         0x13000F0
+#define AQM_EXEC_ERR_HI_W1S             0x13000F8
+#define AQM_EXEC_ERR_HI_ENA_W1C         0x1300100
+#define AQM_EXEC_ERR_HI_ENA_W1S         0x1300108
+#define AQM_ECC_INT                     0x1300110
+#define AQM_ECC_INT_W1S                 0x1300118
+#define AQM_ECC_INT_ENA_W1C             0x1300120
+#define AQM_ECC_INT_ENA_W1S             0x1300128
+#define AQM_ECC_CTL                     0x1300130
+#define AQM_BIST_STATUS                 0x1300138
+#define AQM_CMD_INF_THRX(x)             (0x1300400 + ((x) * 0x8))
+#define AQM_CMD_INFX(x)                 (0x1300800 + ((x) * 0x8))
+#define AQM_GRP_EXECMSK_LOX(x)          (0x1300C00 + ((x) * 0x10))
+#define AQM_GRP_EXECMSK_HIX(x)          (0x1300C08 + ((x) * 0x10))
+#define AQM_ACTIVITY_STAT_LO            0x1300C80
+#define AQM_ACTIVITY_STAT_HI            0x1300C88
+#define AQM_Q_CMD_PROCX(x)              (0x1301000 + ((x) * 0x8))
+#define AQM_PERF_CTL_LO                 0x1301400
+#define AQM_PERF_CTL_HI                 0x1301408
+#define AQM_PERF_CNT                    0x1301410
+
+#define AQMQ_DRBLX(x)                   (0x20000 + ((x) * 0x40000))
+#define AQMQ_QSZX(x)                    (0x20008 + ((x) * 0x40000))
+#define AQMQ_BADRX(x)                   (0x20010 + ((x) * 0x40000))
+#define AQMQ_NXT_CMDX(x)                (0x20018 + ((x) * 0x40000))
+#define AQMQ_CMD_CNTX(x)                (0x20020 + ((x) * 0x40000))
+#define AQMQ_CMP_THRX(x)                (0x20028 + ((x) * 0x40000))
+#define AQMQ_CMP_CNTX(x)                (0x20030 + ((x) * 0x40000))
+#define AQMQ_TIM_LDX(x)                 (0x20038 + ((x) * 0x40000))
+#define AQMQ_TIMERX(x)                  (0x20040 + ((x) * 0x40000))
+#define AQMQ_ENX(x)                     (0x20048 + ((x) * 0x40000))
+#define AQMQ_ACTIVITY_STATX(x)          (0x20050 + ((x) * 0x40000))
+#define AQM_VF_CMP_STATX(x)             (0x28000 + ((x) * 0x40000))
 
 /* NPS core registers */
 #define NPS_CORE_GBL_VFCFG	0x1000000
@@ -135,6 +203,171 @@
 #define PEM0_INT 0x1080428
 
 /**
+ * struct ucd_core_eid_ucode_block_num - Core Eid to Ucode Blk Mapping Registers
+ * @ucode_len: Ucode length identifier 32KB or 64KB
+ * @ucode_blk: Ucode Block Number
+ */
+union ucd_core_eid_ucode_block_num {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_4_63 : 60;
+		u64 ucode_len : 1;
+		u64 ucode_blk : 3;
+#else
+		u64 ucode_blk : 3;
+		u64 ucode_len : 1;
+		u64 raz_4_63 : 60;
+#endif
+	};
+};
+
+/**
+ * struct aqm_grp_execmsk_lo - Available AE engines for the group
+ * @exec_0_to_39: AE engines 0 to 39 status
+ */
+union aqm_grp_execmsk_lo {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_40_63 : 24;
+		u64 exec_0_to_39 : 40;
+#else
+		u64 exec_0_to_39 : 40;
+		u64 raz_40_63 : 24;
+#endif
+	};
+};
+
+/**
+ * struct aqm_grp_execmsk_hi - Available AE engines for the group
+ * @exec_40_to_79: AE engines 40 to 79 status
+ */
+union aqm_grp_execmsk_hi {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_40_63 : 24;
+		u64 exec_40_to_79 : 40;
+#else
+		u64 exec_40_to_79 : 40;
+		u64 raz_40_63 : 24;
+#endif
+	};
+};
+
+/**
+ * struct aqmq_drbl - AQM Queue Doorbell Counter Registers
+ * @dbell_count: Doorbell Counter
+ */
+union aqmq_drbl {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_32_63 : 32;
+		u64 dbell_count : 32;
+#else
+		u64 dbell_count : 32;
+		u64 raz_32_63 : 32;
+#endif
+	};
+};
+
+/**
+ * struct aqmq_qsz - AQM Queue Host Queue Size Registers
+ * @host_queue_size: Size, in numbers of 'aqmq_command_s' command
+ * of the Host Ring.
+ */
+union aqmq_qsz {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_32_63 : 32;
+		u64 host_queue_size : 32;
+#else
+		u64 host_queue_size : 32;
+		u64 raz_32_63 : 32;
+#endif
+	};
+};
+
+/**
+ * struct aqmq_cmp_thr - AQM Queue Commands Completed Threshold Registers
+ * @commands_completed_threshold: Count of 'aqmq_command_s' commands executed
+ * by AE engines for which completion interrupt is asserted.
+ */
+union aqmq_cmp_thr {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_32_63 : 32;
+		u64 commands_completed_threshold : 32;
+#else
+		u64 commands_completed_threshold : 32;
+		u64 raz_32_63 : 32;
+#endif
+	};
+};
+
+/**
+ * struct aqmq_cmp_cnt - AQM Queue Commands Completed Count Registers
+ * @resend: Bit to request completion interrupt Resend.
+ * @completion_status: Command completion status of the ring.
+ * @commands_completed_count: Count of 'aqmq_command_s' commands executed by
+ * AE engines.
+ */
+union aqmq_cmp_cnt {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_34_63 : 30;
+		u64 resend : 1;
+		u64 completion_status : 1;
+		u64 commands_completed_count : 32;
+#else
+		u64 commands_completed_count : 32;
+		u64 completion_status : 1;
+		u64 resend : 1;
+		u64 raz_34_63 : 30;
+#endif
+	};
+};
+
+/**
+ * struct aqmq_en - AQM Queue Enable Registers
+ * @queue_status: 1 = AQMQ is enabled, 0 = AQMQ is disabled
+ */
+union aqmq_en {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_1_63 : 63;
+		u64 queue_enable : 1;
+#else
+		u64 queue_enable : 1;
+		u64 raz_1_63 : 63;
+#endif
+	};
+};
+
+/**
+ * struct aqmq_activity_stat - AQM Queue Activity Status Registers
+ * @queue_active: 1 = AQMQ is active, 0 = AQMQ is quiescent
+ */
+union aqmq_activity_stat {
+	u64 value;
+	struct {
+#if (defined(__BIG_ENDIAN_BITFIELD))
+		u64 raz_1_63 : 63;
+		u64 queue_active : 1;
+#else
+		u64 queue_active : 1;
+		u64 raz_1_63 : 63;
+#endif
+	};
+};
+
+/**
  * struct emu_fuse_map - EMU Fuse Map Registers
  * @ae_fuse: Fuse settings for AE 19..0
  * @se_fuse: Fuse settings for SE 15..0
diff --git a/drivers/crypto/cavium/nitrox/nitrox_debugfs.c b/drivers/crypto/cavium/nitrox/nitrox_debugfs.c
index 848ec93..16f7d0bd 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_debugfs.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_debugfs.c
@@ -9,7 +9,8 @@
 {
 	struct nitrox_device *ndev = s->private;
 
-	seq_printf(s, "Version: %s\n", ndev->hw.fw_name);
+	seq_printf(s, "Version: %s\n", ndev->hw.fw_name[0]);
+	seq_printf(s, "Version: %s\n", ndev->hw.fw_name[1]);
 	return 0;
 }
 
diff --git a/drivers/crypto/cavium/nitrox/nitrox_dev.h b/drivers/crypto/cavium/nitrox/nitrox_dev.h
index 0338877..2217a27 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_dev.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_dev.h
@@ -10,6 +10,10 @@
 #define VERSION_LEN 32
 /* Maximum queues in PF mode */
 #define MAX_PF_QUEUES	64
+/* Maximum device queues */
+#define MAX_DEV_QUEUES (MAX_PF_QUEUES)
+/* Maximum UCD Blocks */
+#define CNN55XX_MAX_UCD_BLOCKS	8
 
 /**
  * struct nitrox_cmdq - NITROX command queue
@@ -74,7 +78,7 @@
  */
 struct nitrox_hw {
 	char partname[IFNAMSIZ * 2];
-	char fw_name[VERSION_LEN];
+	char fw_name[CNN55XX_MAX_UCD_BLOCKS][VERSION_LEN];
 
 	int freq;
 	u16 vendor_id;
@@ -206,6 +210,7 @@
  * @mode: Device mode PF/VF
  * @ctx_pool: DMA pool for crypto context
  * @pkt_inq: Packet input rings
+ * @aqmq: AQM command queues
  * @qvec: MSI-X queue vectors information
  * @iov: SR-IOV informatin
  * @num_vecs: number of MSI-X vectors
@@ -232,6 +237,7 @@
 
 	struct dma_pool *ctx_pool;
 	struct nitrox_cmdq *pkt_inq;
+	struct nitrox_cmdq *aqmq[MAX_DEV_QUEUES] ____cacheline_aligned_in_smp;
 
 	struct nitrox_q_vector *qvec;
 	struct nitrox_iov iov;
diff --git a/drivers/crypto/cavium/nitrox/nitrox_hal.c b/drivers/crypto/cavium/nitrox/nitrox_hal.c
index 3f0df60..34a2f4f 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_hal.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_hal.c
@@ -241,12 +241,12 @@
 }
 
 /**
- * enable_nps_interrupts - enable NPS interrutps
+ * enable_nps_core_interrupts - enable NPS core interrutps
  * @ndev: NITROX device.
  *
- * This includes NPS core, packet in and slc interrupts.
+ * This includes NPS core interrupts.
  */
-static void enable_nps_interrupts(struct nitrox_device *ndev)
+static void enable_nps_core_interrupts(struct nitrox_device *ndev)
 {
 	union nps_core_int_ena_w1s core_int;
 
@@ -258,18 +258,9 @@
 	core_int.s.npco_dma_malform = 1;
 	core_int.s.host_nps_wr_err = 1;
 	nitrox_write_csr(ndev, NPS_CORE_INT_ENA_W1S, core_int.value);
-
-	/* NPS packet in ring interrupts */
-	nitrox_write_csr(ndev, NPS_PKT_IN_RERR_LO_ENA_W1S, (~0ULL));
-	nitrox_write_csr(ndev, NPS_PKT_IN_RERR_HI_ENA_W1S, (~0ULL));
-	nitrox_write_csr(ndev, NPS_PKT_IN_ERR_TYPE_ENA_W1S, (~0ULL));
-	/* NPS packet slc port interrupts */
-	nitrox_write_csr(ndev, NPS_PKT_SLC_RERR_HI_ENA_W1S, (~0ULL));
-	nitrox_write_csr(ndev, NPS_PKT_SLC_RERR_LO_ENA_W1S, (~0ULL));
-	nitrox_write_csr(ndev, NPS_PKT_SLC_ERR_TYPE_ENA_W1S, (~0uLL));
 }
 
-void nitrox_config_nps_unit(struct nitrox_device *ndev)
+void nitrox_config_nps_core_unit(struct nitrox_device *ndev)
 {
 	union nps_core_gbl_vfcfg core_gbl_vfcfg;
 
@@ -281,12 +272,149 @@
 	core_gbl_vfcfg.s.ilk_disable = 1;
 	core_gbl_vfcfg.s.cfg = __NDEV_MODE_PF;
 	nitrox_write_csr(ndev, NPS_CORE_GBL_VFCFG, core_gbl_vfcfg.value);
+
+	/* enable nps core interrupts */
+	enable_nps_core_interrupts(ndev);
+}
+
+/**
+ * enable_nps_pkt_interrupts - enable NPS packet interrutps
+ * @ndev: NITROX device.
+ *
+ * This includes NPS packet in and slc interrupts.
+ */
+static void enable_nps_pkt_interrupts(struct nitrox_device *ndev)
+{
+	/* NPS packet in ring interrupts */
+	nitrox_write_csr(ndev, NPS_PKT_IN_RERR_LO_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_IN_RERR_HI_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_IN_ERR_TYPE_ENA_W1S, (~0ULL));
+	/* NPS packet slc port interrupts */
+	nitrox_write_csr(ndev, NPS_PKT_SLC_RERR_HI_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_SLC_RERR_LO_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, NPS_PKT_SLC_ERR_TYPE_ENA_W1S, (~0uLL));
+}
+
+void nitrox_config_nps_pkt_unit(struct nitrox_device *ndev)
+{
 	/* config input and solicit ports */
 	nitrox_config_pkt_input_rings(ndev);
 	nitrox_config_pkt_solicit_ports(ndev);
 
-	/* enable interrupts */
-	enable_nps_interrupts(ndev);
+	/* enable nps packet interrupts */
+	enable_nps_pkt_interrupts(ndev);
+}
+
+static void reset_aqm_ring(struct nitrox_device *ndev, int ring)
+{
+	union aqmq_en aqmq_en_reg;
+	union aqmq_activity_stat activity_stat;
+	union aqmq_cmp_cnt cmp_cnt;
+	int max_retries = MAX_CSR_RETRIES;
+	u64 offset;
+
+	/* step 1: disable the queue */
+	offset = AQMQ_ENX(ring);
+	aqmq_en_reg.value = 0;
+	aqmq_en_reg.queue_enable = 0;
+	nitrox_write_csr(ndev, offset, aqmq_en_reg.value);
+
+	/* step 2: wait for AQMQ_ACTIVITY_STATX[QUEUE_ACTIVE] to clear */
+	usleep_range(100, 150);
+	offset = AQMQ_ACTIVITY_STATX(ring);
+	do {
+		activity_stat.value = nitrox_read_csr(ndev, offset);
+		if (!activity_stat.queue_active)
+			break;
+		udelay(50);
+	} while (max_retries--);
+
+	/* step 3: clear commands completed count */
+	offset = AQMQ_CMP_CNTX(ring);
+	cmp_cnt.value = nitrox_read_csr(ndev, offset);
+	nitrox_write_csr(ndev, offset, cmp_cnt.value);
+	usleep_range(50, 100);
+}
+
+void enable_aqm_ring(struct nitrox_device *ndev, int ring)
+{
+	union aqmq_en aqmq_en_reg;
+	u64 offset;
+
+	offset = AQMQ_ENX(ring);
+	aqmq_en_reg.value = 0;
+	aqmq_en_reg.queue_enable = 1;
+	nitrox_write_csr(ndev, offset, aqmq_en_reg.value);
+	usleep_range(50, 100);
+}
+
+void nitrox_config_aqm_rings(struct nitrox_device *ndev)
+{
+	int ring;
+
+	for (ring = 0; ring < ndev->nr_queues; ring++) {
+		struct nitrox_cmdq *cmdq = ndev->aqmq[ring];
+		union aqmq_drbl drbl;
+		union aqmq_qsz qsize;
+		union aqmq_cmp_thr cmp_thr;
+		u64 offset;
+
+		/* steps 1 - 3 */
+		reset_aqm_ring(ndev, ring);
+
+		/* step 4: clear doorbell count of ring */
+		offset = AQMQ_DRBLX(ring);
+		drbl.value = 0;
+		drbl.dbell_count = 0xFFFFFFFF;
+		nitrox_write_csr(ndev, offset, drbl.value);
+
+		/* step 5: configure host ring details */
+
+		/* set host address for next command of ring */
+		offset = AQMQ_NXT_CMDX(ring);
+		nitrox_write_csr(ndev, offset, 0ULL);
+
+		/* set host address of ring base */
+		offset = AQMQ_BADRX(ring);
+		nitrox_write_csr(ndev, offset, cmdq->dma);
+
+		/* set ring size */
+		offset = AQMQ_QSZX(ring);
+		qsize.value = 0;
+		qsize.host_queue_size = ndev->qlen;
+		nitrox_write_csr(ndev, offset, qsize.value);
+
+		/* set command completion threshold */
+		offset = AQMQ_CMP_THRX(ring);
+		cmp_thr.value = 0;
+		cmp_thr.commands_completed_threshold = 1;
+		nitrox_write_csr(ndev, offset, cmp_thr.value);
+
+		/* step 6: enable the queue */
+		enable_aqm_ring(ndev, ring);
+	}
+}
+
+static void enable_aqm_interrupts(struct nitrox_device *ndev)
+{
+	/* clear interrupt enable bits */
+	nitrox_write_csr(ndev, AQM_DBELL_OVF_LO_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, AQM_DBELL_OVF_HI_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, AQM_DMA_RD_ERR_LO_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, AQM_DMA_RD_ERR_HI_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, AQM_EXEC_NA_LO_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, AQM_EXEC_NA_HI_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, AQM_EXEC_ERR_LO_ENA_W1S, (~0ULL));
+	nitrox_write_csr(ndev, AQM_EXEC_ERR_HI_ENA_W1S, (~0ULL));
+}
+
+void nitrox_config_aqm_unit(struct nitrox_device *ndev)
+{
+	/* config aqm command queues */
+	nitrox_config_aqm_rings(ndev);
+
+	/* enable aqm interrupts */
+	enable_aqm_interrupts(ndev);
 }
 
 void nitrox_config_pom_unit(struct nitrox_device *ndev)
diff --git a/drivers/crypto/cavium/nitrox/nitrox_hal.h b/drivers/crypto/cavium/nitrox/nitrox_hal.h
index d660641..48b0af0 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_hal.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_hal.h
@@ -4,10 +4,13 @@
 
 #include "nitrox_dev.h"
 
+void nitrox_config_aqm_rings(struct nitrox_device *ndev);
+void nitrox_config_aqm_unit(struct nitrox_device *ndev);
 void nitrox_config_emu_unit(struct nitrox_device *ndev);
 void nitrox_config_pkt_input_rings(struct nitrox_device *ndev);
 void nitrox_config_pkt_solicit_ports(struct nitrox_device *ndev);
-void nitrox_config_nps_unit(struct nitrox_device *ndev);
+void nitrox_config_nps_core_unit(struct nitrox_device *ndev);
+void nitrox_config_nps_pkt_unit(struct nitrox_device *ndev);
 void nitrox_config_pom_unit(struct nitrox_device *ndev);
 void nitrox_config_rand_unit(struct nitrox_device *ndev);
 void nitrox_config_efl_unit(struct nitrox_device *ndev);
@@ -15,6 +18,7 @@
 void nitrox_config_bmo_unit(struct nitrox_device *ndev);
 void nitrox_config_lbc_unit(struct nitrox_device *ndev);
 void invalidate_lbc(struct nitrox_device *ndev);
+void enable_aqm_ring(struct nitrox_device *ndev, int qno);
 void enable_pkt_input_ring(struct nitrox_device *ndev, int ring);
 void enable_pkt_solicit_port(struct nitrox_device *ndev, int port);
 void config_nps_core_vfcfg_mode(struct nitrox_device *ndev, enum vf_mode mode);
diff --git a/drivers/crypto/cavium/nitrox/nitrox_lib.c b/drivers/crypto/cavium/nitrox/nitrox_lib.c
index 4ace9bc..5cbc64b 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_lib.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_lib.c
@@ -19,6 +19,8 @@
 
 /* packet inuput ring alignments */
 #define PKTIN_Q_ALIGN_BYTES 16
+/* AQM Queue input alignments */
+#define AQM_Q_ALIGN_BYTES 32
 
 static int nitrox_cmdq_init(struct nitrox_cmdq *cmdq, int align_bytes)
 {
@@ -57,11 +59,15 @@
 
 static void nitrox_cmdq_cleanup(struct nitrox_cmdq *cmdq)
 {
-	struct nitrox_device *ndev = cmdq->ndev;
+	struct nitrox_device *ndev;
+
+	if (!cmdq)
+		return;
 
 	if (!cmdq->unalign_base)
 		return;
 
+	ndev = cmdq->ndev;
 	cancel_work_sync(&cmdq->backlog_qflush);
 
 	dma_free_coherent(DEV(ndev), cmdq->qsize,
@@ -78,6 +84,57 @@
 	cmdq->instr_size = 0;
 }
 
+static void nitrox_free_aqm_queues(struct nitrox_device *ndev)
+{
+	int i;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		nitrox_cmdq_cleanup(ndev->aqmq[i]);
+		kzfree(ndev->aqmq[i]);
+		ndev->aqmq[i] = NULL;
+	}
+}
+
+static int nitrox_alloc_aqm_queues(struct nitrox_device *ndev)
+{
+	int i, err;
+
+	for (i = 0; i < ndev->nr_queues; i++) {
+		struct nitrox_cmdq *cmdq;
+		u64 offset;
+
+		cmdq = kzalloc_node(sizeof(*cmdq), GFP_KERNEL, ndev->node);
+		if (!cmdq) {
+			err = -ENOMEM;
+			goto aqmq_fail;
+		}
+
+		cmdq->ndev = ndev;
+		cmdq->qno = i;
+		cmdq->instr_size = sizeof(struct aqmq_command_s);
+
+		/* AQM Queue Doorbell Counter Register Address */
+		offset = AQMQ_DRBLX(i);
+		cmdq->dbell_csr_addr = NITROX_CSR_ADDR(ndev, offset);
+		/* AQM Queue Commands Completed Count Register Address */
+		offset = AQMQ_CMD_CNTX(i);
+		cmdq->compl_cnt_csr_addr = NITROX_CSR_ADDR(ndev, offset);
+
+		err = nitrox_cmdq_init(cmdq, AQM_Q_ALIGN_BYTES);
+		if (err) {
+			kzfree(cmdq);
+			goto aqmq_fail;
+		}
+		ndev->aqmq[i] = cmdq;
+	}
+
+	return 0;
+
+aqmq_fail:
+	nitrox_free_aqm_queues(ndev);
+	return err;
+}
+
 static void nitrox_free_pktin_queues(struct nitrox_device *ndev)
 {
 	int i;
@@ -222,6 +279,12 @@
 	if (err)
 		destroy_crypto_dma_pool(ndev);
 
+	err = nitrox_alloc_aqm_queues(ndev);
+	if (err) {
+		nitrox_free_pktin_queues(ndev);
+		destroy_crypto_dma_pool(ndev);
+	}
+
 	return err;
 }
 
@@ -231,6 +294,7 @@
  */
 void nitrox_common_sw_cleanup(struct nitrox_device *ndev)
 {
+	nitrox_free_aqm_queues(ndev);
 	nitrox_free_pktin_queues(ndev);
 	destroy_crypto_dma_pool(ndev);
 }
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index fe825d0..bc92498 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -17,12 +17,17 @@
 
 #define CNN55XX_DEV_ID	0x12
 #define UCODE_HLEN 48
-#define SE_GROUP 0
+#define DEFAULT_SE_GROUP 0
+#define DEFAULT_AE_GROUP 0
 
-#define DRIVER_VERSION "1.1"
+#define DRIVER_VERSION "1.2"
+#define CNN55XX_UCD_BLOCK_SIZE 32768
+#define CNN55XX_MAX_UCODE_SIZE (CNN55XX_UCD_BLOCK_SIZE * 2)
 #define FW_DIR "cavium/"
 /* SE microcode */
 #define SE_FW	FW_DIR "cnn55xx_se.fw"
+/* AE microcode */
+#define AE_FW	FW_DIR "cnn55xx_ae.fw"
 
 static const char nitrox_driver_name[] = "CNN55XX";
 
@@ -72,10 +77,10 @@
 /**
  * write_to_ucd_unit - Write Firmware to NITROX UCD unit
  */
-static void write_to_ucd_unit(struct nitrox_device *ndev,
-			      struct ucode *ucode)
+static void write_to_ucd_unit(struct nitrox_device *ndev, u32 ucode_size,
+			      u64 *ucode_data, int block_num)
 {
-	u32 code_size = be32_to_cpu(ucode->code_size) * 2;
+	u32 code_size;
 	u64 offset, data;
 	int i = 0;
 
@@ -96,11 +101,12 @@
 
 	/* set the block number */
 	offset = UCD_UCODE_LOAD_BLOCK_NUM;
-	nitrox_write_csr(ndev, offset, 0);
+	nitrox_write_csr(ndev, offset, block_num);
 
+	code_size = ucode_size;
 	code_size = roundup(code_size, 8);
 	while (code_size) {
-		data = ucode->code[i];
+		data = ucode_data[i];
 		/* write 8 bytes at a time */
 		offset = UCD_UCODE_LOAD_IDX_DATAX(i);
 		nitrox_write_csr(ndev, offset, data);
@@ -108,29 +114,23 @@
 		i++;
 	}
 
-	/* put all SE cores in group 0 */
-	offset = POM_GRP_EXECMASKX(SE_GROUP);
-	nitrox_write_csr(ndev, offset, (~0ULL));
-
-	for (i = 0; i < ndev->hw.se_cores; i++) {
-		/*
-		 * write block number and firware length
-		 * bit:<2:0> block number
-		 * bit:3 is set SE uses 32KB microcode
-		 * bit:3 is clear SE uses 64KB microcode
-		 */
-		offset = UCD_SE_EID_UCODE_BLOCK_NUMX(i);
-		nitrox_write_csr(ndev, offset, 0x8);
-	}
 	usleep_range(300, 400);
 }
 
-static int nitrox_load_fw(struct nitrox_device *ndev, const char *fw_name)
+static int nitrox_load_fw(struct nitrox_device *ndev)
 {
 	const struct firmware *fw;
+	const char *fw_name;
 	struct ucode *ucode;
-	int ret;
+	u64 *ucode_data;
+	u64 offset;
+	union ucd_core_eid_ucode_block_num core_2_eid_val;
+	union aqm_grp_execmsk_lo aqm_grp_execmask_lo;
+	union aqm_grp_execmsk_hi aqm_grp_execmask_hi;
+	u32 ucode_size;
+	int ret, i = 0;
 
+	fw_name = SE_FW;
 	dev_info(DEV(ndev), "Loading firmware \"%s\"\n", fw_name);
 
 	ret = request_firmware(&fw, fw_name, DEV(ndev));
@@ -140,13 +140,101 @@
 	}
 
 	ucode = (struct ucode *)fw->data;
-	/* copy the firmware version */
-	memcpy(ndev->hw.fw_name, ucode->version, (VERSION_LEN - 2));
-	ndev->hw.fw_name[VERSION_LEN - 1] = '\0';
 
-	write_to_ucd_unit(ndev, ucode);
+	ucode_size = be32_to_cpu(ucode->code_size) * 2;
+	if (!ucode_size || ucode_size > CNN55XX_MAX_UCODE_SIZE) {
+		dev_err(DEV(ndev), "Invalid ucode size: %u for firmware %s\n",
+			ucode_size, fw_name);
+		release_firmware(fw);
+		return -EINVAL;
+	}
+	ucode_data = ucode->code;
+
+	/* copy the firmware version */
+	memcpy(&ndev->hw.fw_name[0][0], ucode->version, (VERSION_LEN - 2));
+	ndev->hw.fw_name[0][VERSION_LEN - 1] = '\0';
+
+	/* Load SE Firmware on UCD Block 0 */
+	write_to_ucd_unit(ndev, ucode_size, ucode_data, 0);
+
 	release_firmware(fw);
 
+	/* put all SE cores in DEFAULT_SE_GROUP */
+	offset = POM_GRP_EXECMASKX(DEFAULT_SE_GROUP);
+	nitrox_write_csr(ndev, offset, (~0ULL));
+
+	/* write block number and firmware length
+	 * bit:<2:0> block number
+	 * bit:3 is set SE uses 32KB microcode
+	 * bit:3 is clear SE uses 64KB microcode
+	 */
+	core_2_eid_val.value = 0ULL;
+	core_2_eid_val.ucode_blk = 0;
+	if (ucode_size <= CNN55XX_UCD_BLOCK_SIZE)
+		core_2_eid_val.ucode_len = 1;
+	else
+		core_2_eid_val.ucode_len = 0;
+
+	for (i = 0; i < ndev->hw.se_cores; i++) {
+		offset = UCD_SE_EID_UCODE_BLOCK_NUMX(i);
+		nitrox_write_csr(ndev, offset, core_2_eid_val.value);
+	}
+
+
+	fw_name = AE_FW;
+	dev_info(DEV(ndev), "Loading firmware \"%s\"\n", fw_name);
+
+	ret = request_firmware(&fw, fw_name, DEV(ndev));
+	if (ret < 0) {
+		dev_err(DEV(ndev), "failed to get firmware %s\n", fw_name);
+		return ret;
+	}
+
+	ucode = (struct ucode *)fw->data;
+
+	ucode_size = be32_to_cpu(ucode->code_size) * 2;
+	if (!ucode_size || ucode_size > CNN55XX_MAX_UCODE_SIZE) {
+		dev_err(DEV(ndev), "Invalid ucode size: %u for firmware %s\n",
+			ucode_size, fw_name);
+		release_firmware(fw);
+		return -EINVAL;
+	}
+	ucode_data = ucode->code;
+
+	/* copy the firmware version */
+	memcpy(&ndev->hw.fw_name[1][0], ucode->version, (VERSION_LEN - 2));
+	ndev->hw.fw_name[1][VERSION_LEN - 1] = '\0';
+
+	/* Load AE Firmware on UCD Block 2 */
+	write_to_ucd_unit(ndev, ucode_size, ucode_data, 2);
+
+	release_firmware(fw);
+
+	/* put all AE cores in DEFAULT_AE_GROUP */
+	offset = AQM_GRP_EXECMSK_LOX(DEFAULT_AE_GROUP);
+	aqm_grp_execmask_lo.exec_0_to_39 = 0xFFFFFFFFFFULL;
+	nitrox_write_csr(ndev, offset, aqm_grp_execmask_lo.value);
+	offset = AQM_GRP_EXECMSK_HIX(DEFAULT_AE_GROUP);
+	aqm_grp_execmask_hi.exec_40_to_79 = 0xFFFFFFFFFFULL;
+	nitrox_write_csr(ndev, offset, aqm_grp_execmask_hi.value);
+
+	/* write block number and firmware length
+	 * bit:<2:0> block number
+	 * bit:3 is set SE uses 32KB microcode
+	 * bit:3 is clear SE uses 64KB microcode
+	 */
+	core_2_eid_val.value = 0ULL;
+	core_2_eid_val.ucode_blk = 0;
+	if (ucode_size <= CNN55XX_UCD_BLOCK_SIZE)
+		core_2_eid_val.ucode_len = 1;
+	else
+		core_2_eid_val.ucode_len = 0;
+
+	for (i = 0; i < ndev->hw.ae_cores; i++) {
+		offset = UCD_AE_EID_UCODE_BLOCK_NUMX(i);
+		nitrox_write_csr(ndev, offset, core_2_eid_val.value);
+	}
+
 	return 0;
 }
 
@@ -299,7 +387,9 @@
 	/* get cores information */
 	nitrox_get_hwinfo(ndev);
 
-	nitrox_config_nps_unit(ndev);
+	nitrox_config_nps_core_unit(ndev);
+	nitrox_config_aqm_unit(ndev);
+	nitrox_config_nps_pkt_unit(ndev);
 	nitrox_config_pom_unit(ndev);
 	nitrox_config_efl_unit(ndev);
 	/* configure IO units */
@@ -309,8 +399,8 @@
 	nitrox_config_lbc_unit(ndev);
 	nitrox_config_rand_unit(ndev);
 
-	/* load firmware on SE cores */
-	err = nitrox_load_fw(ndev, SE_FW);
+	/* load firmware on cores */
+	err = nitrox_load_fw(ndev);
 	if (err)
 		return err;
 
diff --git a/drivers/crypto/cavium/nitrox/nitrox_req.h b/drivers/crypto/cavium/nitrox/nitrox_req.h
index efdbd0f..f69ba02 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_req.h
+++ b/drivers/crypto/cavium/nitrox/nitrox_req.h
@@ -400,6 +400,36 @@
 };
 
 /**
+ * struct aqmq_command_s - The 32 byte command for AE processing.
+ * @opcode: Request opcode
+ * @param1: Request control parameter 1
+ * @param2: Request control parameter 2
+ * @dlen: Input length
+ * @dptr: Input pointer points to buffer in remote host
+ * @rptr: Result pointer points to buffer in remote host
+ * @grp: AQM Group (0..7)
+ * @cptr: Context pointer
+ */
+struct aqmq_command_s {
+	__be16 opcode;
+	__be16 param1;
+	__be16 param2;
+	__be16 dlen;
+	__be64 dptr;
+	__be64 rptr;
+	union {
+		__be64 word3;
+#if defined(__BIG_ENDIAN_BITFIELD)
+		u64 grp : 3;
+		u64 cptr : 61;
+#else
+		u64 cptr : 61;
+		u64 grp : 3;
+#endif
+	};
+};
+
+/**
  * struct ctx_hdr - Book keeping data about the crypto context
  * @pool: Pool used to allocate crypto context
  * @dma: Base DMA address of the cypto context
diff --git a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
index 7e4a5e6..3cdce1f 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
@@ -7,7 +7,7 @@
 #include <crypto/aes.h>
 #include <crypto/skcipher.h>
 #include <crypto/ctr.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/xts.h>
 
 #include "nitrox_dev.h"
@@ -257,7 +257,7 @@
 static int nitrox_3des_setkey(struct crypto_skcipher *cipher,
 			      const u8 *key, unsigned int keylen)
 {
-	return unlikely(des3_verify_key(cipher, key)) ?:
+	return verify_skcipher_des3_key(cipher, key) ?:
 	       nitrox_skcipher_setkey(cipher, 0, key, keylen);
 }
 
diff --git a/drivers/crypto/cavium/nitrox/nitrox_sriov.c b/drivers/crypto/cavium/nitrox/nitrox_sriov.c
index bf439d8..43287f8 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_sriov.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_sriov.c
@@ -109,6 +109,9 @@
 		return err;
 	}
 
+	/* configure the AQM queues */
+	nitrox_config_aqm_rings(ndev);
+
 	/* configure the packet queues */
 	nitrox_config_pkt_input_rings(ndev);
 	nitrox_config_pkt_solicit_ports(ndev);
diff --git a/drivers/crypto/cavium/zip/zip_main.c b/drivers/crypto/cavium/zip/zip_main.c
index a8447a3..194624b 100644
--- a/drivers/crypto/cavium/zip/zip_main.c
+++ b/drivers/crypto/cavium/zip/zip_main.c
@@ -593,6 +593,7 @@
 	.owner = THIS_MODULE,
 	.open  = zip_stats_open,
 	.read  = seq_read,
+	.release = single_release,
 };
 
 static int zip_clear_open(struct inode *inode, struct file *file)
@@ -604,6 +605,7 @@
 	.owner = THIS_MODULE,
 	.open  = zip_clear_open,
 	.read  = seq_read,
+	.release = single_release,
 };
 
 static int zip_regs_open(struct inode *inode, struct file *file)
@@ -615,6 +617,7 @@
 	.owner = THIS_MODULE,
 	.open  = zip_regs_open,
 	.read  = seq_read,
+	.release = single_release,
 };
 
 /* Root directory for thunderx_zip debugfs entry */
diff --git a/drivers/crypto/ccp/Kconfig b/drivers/crypto/ccp/Kconfig
index 48f3edc..8fec733 100644
--- a/drivers/crypto/ccp/Kconfig
+++ b/drivers/crypto/ccp/Kconfig
@@ -30,6 +30,7 @@
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_AUTHENC
 	select CRYPTO_RSA
+	select CRYPTO_LIB_AES
 	help
 	  Support for using the cryptographic API with the AMD Cryptographic
 	  Coprocessor. This module supports offload of SHA and AES algorithms.
@@ -45,3 +46,11 @@
 	 management commands in Secure Encrypted Virtualization (SEV) mode,
 	 along with software-based Trusted Execution Environment (TEE) to
 	 enable third-party trusted applications.
+
+config CRYPTO_DEV_CCP_DEBUGFS
+	bool "Enable CCP Internals in DebugFS"
+	default n
+	depends on CRYPTO_DEV_SP_CCP
+	help
+	  Expose CCP device information such as operation statistics, feature
+	  information, and descriptor queue contents.
diff --git a/drivers/crypto/ccp/Makefile b/drivers/crypto/ccp/Makefile
index 51d1c0c..6b86f1e 100644
--- a/drivers/crypto/ccp/Makefile
+++ b/drivers/crypto/ccp/Makefile
@@ -5,8 +5,8 @@
 	    ccp-ops.o \
 	    ccp-dev-v3.o \
 	    ccp-dev-v5.o \
-	    ccp-dmaengine.o \
-	    ccp-debugfs.o
+	    ccp-dmaengine.o
+ccp-$(CONFIG_CRYPTO_DEV_CCP_DEBUGFS) += ccp-debugfs.o
 ccp-$(CONFIG_PCI) += sp-pci.o
 ccp-$(CONFIG_CRYPTO_DEV_SP_PSP) += psp-dev.o
 
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
index bb7219d..32f19f4 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-cmac.c
@@ -261,6 +261,7 @@
 		ccp_crypto_ahash_alg(crypto_ahash_tfm(tfm));
 	u64 k0_hi, k0_lo, k1_hi, k1_lo, k2_hi, k2_lo;
 	u64 rb_hi = 0x00, rb_lo = 0x87;
+	struct crypto_aes_ctx aes;
 	__be64 *gk;
 	int ret;
 
@@ -284,14 +285,14 @@
 	ctx->u.aes.key_len = 0;
 
 	/* Set the key for the AES cipher used to generate the keys */
-	ret = crypto_cipher_setkey(ctx->u.aes.tfm_cipher, key, key_len);
+	ret = aes_expandkey(&aes, key, key_len);
 	if (ret)
 		return ret;
 
 	/* Encrypt a block of zeroes - use key area in context */
 	memset(ctx->u.aes.key, 0, sizeof(ctx->u.aes.key));
-	crypto_cipher_encrypt_one(ctx->u.aes.tfm_cipher, ctx->u.aes.key,
-				  ctx->u.aes.key);
+	aes_encrypt(&aes, ctx->u.aes.key, ctx->u.aes.key);
+	memzero_explicit(&aes, sizeof(aes));
 
 	/* Generate K1 and K2 */
 	k0_hi = be64_to_cpu(*((__be64 *)ctx->u.aes.key));
@@ -336,32 +337,15 @@
 {
 	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct crypto_ahash *ahash = __crypto_ahash_cast(tfm);
-	struct crypto_cipher *cipher_tfm;
 
 	ctx->complete = ccp_aes_cmac_complete;
 	ctx->u.aes.key_len = 0;
 
 	crypto_ahash_set_reqsize(ahash, sizeof(struct ccp_aes_cmac_req_ctx));
 
-	cipher_tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_NEED_FALLBACK);
-	if (IS_ERR(cipher_tfm)) {
-		pr_warn("could not load aes cipher driver\n");
-		return PTR_ERR(cipher_tfm);
-	}
-	ctx->u.aes.tfm_cipher = cipher_tfm;
-
 	return 0;
 }
 
-static void ccp_aes_cmac_cra_exit(struct crypto_tfm *tfm)
-{
-	struct ccp_ctx *ctx = crypto_tfm_ctx(tfm);
-
-	if (ctx->u.aes.tfm_cipher)
-		crypto_free_cipher(ctx->u.aes.tfm_cipher);
-	ctx->u.aes.tfm_cipher = NULL;
-}
-
 int ccp_register_aes_cmac_algs(struct list_head *head)
 {
 	struct ccp_crypto_ahash_alg *ccp_alg;
@@ -401,7 +385,6 @@
 	base->cra_ctxsize = sizeof(struct ccp_ctx);
 	base->cra_priority = CCP_CRA_PRIORITY;
 	base->cra_init = ccp_aes_cmac_cra_init;
-	base->cra_exit = ccp_aes_cmac_cra_exit;
 	base->cra_module = THIS_MODULE;
 
 	ret = crypto_register_ahash(alg);
diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
index 783ba75..8e4a531 100644
--- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c
+++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c
@@ -116,9 +116,6 @@
 	if (!ctx->u.aes.key_len)
 		return -EINVAL;
 
-	if (req->nbytes & (AES_BLOCK_SIZE - 1))
-		return -EINVAL;
-
 	if (!req->info)
 		return -EINVAL;
 
diff --git a/drivers/crypto/ccp/ccp-crypto-des3.c b/drivers/crypto/ccp/ccp-crypto-des3.c
index 5f05f83..d2c49b2 100644
--- a/drivers/crypto/ccp/ccp-crypto-des3.c
+++ b/drivers/crypto/ccp/ccp-crypto-des3.c
@@ -14,7 +14,7 @@
 #include <linux/crypto.h>
 #include <crypto/algapi.h>
 #include <crypto/scatterwalk.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 
 #include "ccp-crypto.h"
 
@@ -39,11 +39,10 @@
 	struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ablkcipher_tfm(tfm));
 	struct ccp_crypto_ablkcipher_alg *alg =
 		ccp_crypto_ablkcipher_alg(crypto_ablkcipher_tfm(tfm));
-	u32 *flags = &tfm->base.crt_flags;
 	int err;
 
-	err = __des3_verify_key(flags, key);
-	if (unlikely(err))
+	err = verify_ablkcipher_des3_key(tfm, key);
+	if (err)
 		return err;
 
 	/* It's not clear that there is any support for a keysize of 112.
diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c
index 8180964..8ee4cb4 100644
--- a/drivers/crypto/ccp/ccp-crypto-main.c
+++ b/drivers/crypto/ccp/ccp-crypto-main.c
@@ -405,8 +405,10 @@
 	int ret;
 
 	ret = ccp_present();
-	if (ret)
+	if (ret) {
+		pr_err("Cannot load: there are no available CCPs\n");
 		return ret;
+	}
 
 	spin_lock_init(&req_queue_lock);
 	INIT_LIST_HEAD(&req_queue.cmds);
diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h
index 622b34c..9015b5d 100644
--- a/drivers/crypto/ccp/ccp-crypto.h
+++ b/drivers/crypto/ccp/ccp-crypto.h
@@ -12,7 +12,6 @@
 
 #include <linux/list.h>
 #include <linux/wait.h>
-#include <linux/pci.h>
 #include <linux/ccp.h>
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
@@ -24,6 +23,10 @@
 #include <crypto/akcipher.h>
 #include <crypto/internal/rsa.h>
 
+/* We want the module name in front of our messages */
+#undef pr_fmt
+#define	pr_fmt(fmt)	KBUILD_MODNAME ": " fmt
+
 #define	CCP_LOG_LEVEL	KERN_INFO
 
 #define CCP_CRA_PRIORITY	300
@@ -87,9 +90,6 @@
 	/* Fallback cipher for XTS with unsupported unit sizes */
 	struct crypto_sync_skcipher *tfm_skcipher;
 
-	/* Cipher used to generate CMAC K1/K2 keys */
-	struct crypto_cipher *tfm_cipher;
-
 	enum ccp_engine engine;
 	enum ccp_aes_type type;
 	enum ccp_aes_mode mode;
diff --git a/drivers/crypto/ccp/ccp-dev-v3.c b/drivers/crypto/ccp/ccp-dev-v3.c
index 2b7d47e..0186b3d 100644
--- a/drivers/crypto/ccp/ccp-dev-v3.c
+++ b/drivers/crypto/ccp/ccp-dev-v3.c
@@ -10,7 +10,6 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/pci.h>
 #include <linux/kthread.h>
 #include <linux/interrupt.h>
 #include <linux/ccp.h>
@@ -379,7 +378,7 @@
 	/* Find available queues */
 	ccp->qim = 0;
 	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
-	for (i = 0; i < MAX_HW_QUEUES; i++) {
+	for (i = 0; (i < MAX_HW_QUEUES) && (ccp->cmd_q_count < ccp->max_q_count); i++) {
 		if (!(qmr & (1 << i)))
 			continue;
 
diff --git a/drivers/crypto/ccp/ccp-dev-v5.c b/drivers/crypto/ccp/ccp-dev-v5.c
index 217e41b..57eb53b 100644
--- a/drivers/crypto/ccp/ccp-dev-v5.c
+++ b/drivers/crypto/ccp/ccp-dev-v5.c
@@ -2,16 +2,13 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2019 Advanced Micro Devices, Inc.
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  */
 
-#include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/pci.h>
 #include <linux/kthread.h>
-#include <linux/debugfs.h>
 #include <linux/dma-mapping.h>
 #include <linux/interrupt.h>
 #include <linux/compiler.h>
@@ -792,8 +789,7 @@
 
 	/* Find available queues */
 	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
-	for (i = 0; i < MAX_HW_QUEUES; i++) {
-
+	for (i = 0; (i < MAX_HW_QUEUES) && (ccp->cmd_q_count < ccp->max_q_count); i++) {
 		if (!(qmr & (1 << i)))
 			continue;
 
@@ -806,6 +802,7 @@
 		if (!dma_pool) {
 			dev_err(dev, "unable to allocate dma pool\n");
 			ret = -ENOMEM;
+			goto e_pool;
 		}
 
 		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
@@ -819,9 +816,9 @@
 		/* Page alignment satisfies our needs for N <= 128 */
 		BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
 		cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
-		cmd_q->qbase = dma_alloc_coherent(dev, cmd_q->qsize,
-						  &cmd_q->qbase_dma,
-						  GFP_KERNEL);
+		cmd_q->qbase = dmam_alloc_coherent(dev, cmd_q->qsize,
+						   &cmd_q->qbase_dma,
+						   GFP_KERNEL);
 		if (!cmd_q->qbase) {
 			dev_err(dev, "unable to allocate command queue\n");
 			ret = -ENOMEM;
@@ -970,8 +967,10 @@
 	if (ret)
 		goto e_hwrng;
 
+#ifdef CONFIG_CRYPTO_DEV_CCP_DEBUGFS
 	/* Set up debugfs entries */
 	ccp5_debugfs_setup(ccp);
+#endif
 
 	return 0;
 
@@ -995,7 +994,6 @@
 
 static void ccp5_destroy(struct ccp_device *ccp)
 {
-	struct device *dev = ccp->dev;
 	struct ccp_cmd_queue *cmd_q;
 	struct ccp_cmd *cmd;
 	unsigned int i;
@@ -1009,11 +1007,13 @@
 	/* Remove this device from the list of available units first */
 	ccp_del_device(ccp);
 
+#ifdef CONFIG_CRYPTO_DEV_CCP_DEBUGFS
 	/* We're in the process of tearing down the entire driver;
 	 * when all the devices are gone clean up debugfs
 	 */
 	if (ccp_present())
 		ccp5_debugfs_destroy();
+#endif
 
 	/* Disable and clear interrupts */
 	ccp5_disable_queue_interrupts(ccp);
@@ -1036,12 +1036,6 @@
 
 	sp_free_ccp_irq(ccp->sp, ccp);
 
-	for (i = 0; i < ccp->cmd_q_count; i++) {
-		cmd_q = &ccp->cmd_q[i];
-		dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
-				  cmd_q->qbase_dma);
-	}
-
 	/* Flush the cmd and backlog queue */
 	while (!list_empty(&ccp->cmd)) {
 		/* Invoke the callback directly with an error code */
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index edefa6691..73acf0f 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -2,12 +2,13 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2013,2017 Advanced Micro Devices, Inc.
+ * Copyright (C) 2013,2019 Advanced Micro Devices, Inc.
  *
  * Author: Tom Lendacky <thomas.lendacky@amd.com>
  * Author: Gary R Hook <gary.hook@amd.com>
  */
 
+#include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/kthread.h>
 #include <linux/sched.h>
@@ -19,6 +20,7 @@
 #include <linux/delay.h>
 #include <linux/hw_random.h>
 #include <linux/cpu.h>
+#include <linux/atomic.h>
 #ifdef CONFIG_X86
 #include <asm/cpu_device_id.h>
 #endif
@@ -26,6 +28,19 @@
 
 #include "ccp-dev.h"
 
+#define MAX_CCPS 32
+
+/* Limit CCP use to a specifed number of queues per device */
+static unsigned int nqueues = 0;
+module_param(nqueues, uint, 0444);
+MODULE_PARM_DESC(nqueues, "Number of queues per CCP (minimum 1; default: all available)");
+
+/* Limit the maximum number of configured CCPs */
+static atomic_t dev_count = ATOMIC_INIT(0);
+static unsigned int max_devs = MAX_CCPS;
+module_param(max_devs, uint, 0444);
+MODULE_PARM_DESC(max_devs, "Maximum number of CCPs to enable (default: all; 0 disables all CCPs)");
+
 struct ccp_tasklet_data {
 	struct completion completion;
 	struct ccp_cmd *cmd;
@@ -594,12 +609,24 @@
 	struct ccp_device *ccp;
 	int ret;
 
+	/*
+	 * Check how many we have so far, and stop after reaching
+	 * that number
+	 */
+	if (atomic_inc_return(&dev_count) > max_devs)
+		return 0; /* don't fail the load */
+
 	ret = -ENOMEM;
 	ccp = ccp_alloc_struct(sp);
 	if (!ccp)
 		goto e_err;
 	sp->ccp_data = ccp;
 
+	if (!nqueues || (nqueues > MAX_HW_QUEUES))
+		ccp->max_q_count = MAX_HW_QUEUES;
+	else
+		ccp->max_q_count = nqueues;
+
 	ccp->vdata = (struct ccp_vdata *)sp->dev_vdata->ccp_vdata;
 	if (!ccp->vdata || !ccp->vdata->version) {
 		ret = -ENODEV;
diff --git a/drivers/crypto/ccp/ccp-dev.h b/drivers/crypto/ccp/ccp-dev.h
index 5e62492..3f68262 100644
--- a/drivers/crypto/ccp/ccp-dev.h
+++ b/drivers/crypto/ccp/ccp-dev.h
@@ -12,11 +12,11 @@
 #define __CCP_DEV_H__
 
 #include <linux/device.h>
-#include <linux/pci.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
 #include <linux/wait.h>
+#include <linux/dma-direction.h>
 #include <linux/dmapool.h>
 #include <linux/hw_random.h>
 #include <linux/bitops.h>
@@ -379,6 +379,7 @@
 	 */
 	struct ccp_cmd_queue cmd_q[MAX_HW_QUEUES];
 	unsigned int cmd_q_count;
+	unsigned int max_q_count;
 
 	/* Support for the CCP True RNG
 	 */
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index 7f22a45..a54f936 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -2,13 +2,14 @@
 /*
  * AMD Cryptographic Coprocessor (CCP) driver
  *
- * Copyright (C) 2016,2017 Advanced Micro Devices, Inc.
+ * Copyright (C) 2016,2019 Advanced Micro Devices, Inc.
  *
  * Author: Gary R Hook <gary.hook@amd.com>
  */
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/dma-mapping.h>
 #include <linux/dmaengine.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
@@ -35,6 +36,10 @@
 module_param(dma_chan_attr, uint, 0444);
 MODULE_PARM_DESC(dma_chan_attr, "Set DMA channel visibility: 0 (default) = device defaults, 1 = make private, 2 = make public");
 
+static unsigned int dmaengine = 1;
+module_param(dmaengine, uint, 0444);
+MODULE_PARM_DESC(dmaengine, "Register services with the DMA subsystem (any non-zero value, default: 1)");
+
 static unsigned int ccp_get_dma_chan_attr(struct ccp_device *ccp)
 {
 	switch (dma_chan_attr) {
@@ -637,6 +642,9 @@
 	unsigned int i;
 	int ret;
 
+	if (!dmaengine)
+		return 0;
+
 	ccp->ccp_dma_chan = devm_kcalloc(ccp->dev, ccp->cmd_q_count,
 					 sizeof(*(ccp->ccp_dma_chan)),
 					 GFP_KERNEL);
@@ -740,6 +748,9 @@
 {
 	struct dma_device *dma_dev = &ccp->dma_dev;
 
+	if (!dmaengine)
+		return;
+
 	dma_async_device_unregister(dma_dev);
 
 	kmem_cache_destroy(ccp->dma_desc_cache);
diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c
index 9bc3c62..c8da8eb 100644
--- a/drivers/crypto/ccp/ccp-ops.c
+++ b/drivers/crypto/ccp/ccp-ops.c
@@ -10,7 +10,6 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/pci.h>
 #include <linux/interrupt.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/des.h>
@@ -150,14 +149,13 @@
 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
 		wa->dma_pool = cmd_q->dma_pool;
 
-		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
+		wa->address = dma_pool_zalloc(wa->dma_pool, GFP_KERNEL,
 					     &wa->dma.address);
 		if (!wa->address)
 			return -ENOMEM;
 
 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
 
-		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
 	} else {
 		wa->address = kzalloc(len, GFP_KERNEL);
 		if (!wa->address)
@@ -455,8 +453,8 @@
 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
 }
 
-static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
-				struct ccp_cmd *cmd)
+static noinline_for_stack int
+ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_aes_engine *aes = &cmd->u.aes;
 	struct ccp_dm_workarea key, ctx;
@@ -611,8 +609,8 @@
 	return ret;
 }
 
-static int ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q,
-			       struct ccp_cmd *cmd)
+static noinline_for_stack int
+ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_aes_engine *aes = &cmd->u.aes;
 	struct ccp_dm_workarea key, ctx, final_wa, tag;
@@ -894,7 +892,8 @@
 	return ret;
 }
 
-static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+static noinline_for_stack int
+ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_aes_engine *aes = &cmd->u.aes;
 	struct ccp_dm_workarea key, ctx;
@@ -904,12 +903,6 @@
 	bool in_place = false;
 	int ret;
 
-	if (aes->mode == CCP_AES_MODE_CMAC)
-		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
-
-	if (aes->mode == CCP_AES_MODE_GCM)
-		return ccp_run_aes_gcm_cmd(cmd_q, cmd);
-
 	if (!((aes->key_len == AES_KEYSIZE_128) ||
 	      (aes->key_len == AES_KEYSIZE_192) ||
 	      (aes->key_len == AES_KEYSIZE_256)))
@@ -1076,8 +1069,8 @@
 	return ret;
 }
 
-static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
-			       struct ccp_cmd *cmd)
+static noinline_for_stack int
+ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
 	struct ccp_dm_workarea key, ctx;
@@ -1276,7 +1269,8 @@
 	return ret;
 }
 
-static int ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+static noinline_for_stack int
+ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_des3_engine *des3 = &cmd->u.des3;
 
@@ -1472,7 +1466,8 @@
 	return ret;
 }
 
-static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+static noinline_for_stack int
+ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_sha_engine *sha = &cmd->u.sha;
 	struct ccp_dm_workarea ctx;
@@ -1816,7 +1811,8 @@
 	return ret;
 }
 
-static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+static noinline_for_stack int
+ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
 	struct ccp_dm_workarea exp, src, dst;
@@ -1947,8 +1943,8 @@
 	return ret;
 }
 
-static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
-				struct ccp_cmd *cmd)
+static noinline_for_stack int
+ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
 	struct ccp_dm_workarea mask;
@@ -2079,7 +2075,8 @@
 	return ret;
 }
 
-static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
+static noinline_for_stack int
+ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
 				      struct ccp_cmd *cmd)
 {
 	struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap;
@@ -2420,7 +2417,8 @@
 	return ret;
 }
 
-static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
+static noinline_for_stack int
+ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
 {
 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
 
@@ -2457,7 +2455,17 @@
 
 	switch (cmd->engine) {
 	case CCP_ENGINE_AES:
-		ret = ccp_run_aes_cmd(cmd_q, cmd);
+		switch (cmd->u.aes.mode) {
+		case CCP_AES_MODE_CMAC:
+			ret = ccp_run_aes_cmac_cmd(cmd_q, cmd);
+			break;
+		case CCP_AES_MODE_GCM:
+			ret = ccp_run_aes_gcm_cmd(cmd_q, cmd);
+			break;
+		default:
+			ret = ccp_run_aes_cmd(cmd_q, cmd);
+			break;
+		}
 		break;
 	case CCP_ENGINE_XTS_AES_128:
 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
diff --git a/drivers/crypto/ccp/psp-dev.h b/drivers/crypto/ccp/psp-dev.h
index c5e06c9..82a084f 100644
--- a/drivers/crypto/ccp/psp-dev.h
+++ b/drivers/crypto/ccp/psp-dev.h
@@ -11,7 +11,6 @@
 #define __PSP_DEV_H__
 
 #include <linux/device.h>
-#include <linux/pci.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
diff --git a/drivers/crypto/ccp/sp-dev.h b/drivers/crypto/ccp/sp-dev.h
index 8abe9ea..53c1256 100644
--- a/drivers/crypto/ccp/sp-dev.h
+++ b/drivers/crypto/ccp/sp-dev.h
@@ -13,7 +13,6 @@
 #define __SP_DEV_H__
 
 #include <linux/device.h>
-#include <linux/pci.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
 #include <linux/list.h>
diff --git a/drivers/crypto/ccp/sp-platform.c b/drivers/crypto/ccp/sp-platform.c
index 1b45236..831aac1 100644
--- a/drivers/crypto/ccp/sp-platform.c
+++ b/drivers/crypto/ccp/sp-platform.c
@@ -125,7 +125,6 @@
 	struct sp_platform *sp_platform;
 	struct device *dev = &pdev->dev;
 	enum dev_dma_attr attr;
-	struct resource *ior;
 	int ret;
 
 	ret = -ENOMEM;
@@ -146,8 +145,7 @@
 		goto e_err;
 	}
 
-	ior = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	sp->io_map = devm_ioremap_resource(dev, ior);
+	sp->io_map = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(sp->io_map)) {
 		ret = PTR_ERR(sp->io_map);
 		goto e_err;
diff --git a/drivers/crypto/ccree/Makefile b/drivers/crypto/ccree/Makefile
index 145e50b..5cfda50 100644
--- a/drivers/crypto/ccree/Makefile
+++ b/drivers/crypto/ccree/Makefile
@@ -2,7 +2,7 @@
 # Copyright (C) 2012-2019 ARM Limited (or its affiliates).
 
 obj-$(CONFIG_CRYPTO_DEV_CCREE) := ccree.o
-ccree-y := cc_driver.o cc_buffer_mgr.o cc_request_mgr.o cc_cipher.o cc_hash.o cc_aead.o cc_ivgen.o cc_sram_mgr.o
+ccree-y := cc_driver.o cc_buffer_mgr.o cc_request_mgr.o cc_cipher.o cc_hash.o cc_aead.o cc_sram_mgr.o
 ccree-$(CONFIG_CRYPTO_FIPS) += cc_fips.o
 ccree-$(CONFIG_DEBUG_FS) += cc_debugfs.o
 ccree-$(CONFIG_PM) += cc_pm.o
diff --git a/drivers/crypto/ccree/cc_aead.c b/drivers/crypto/ccree/cc_aead.c
index 7aa4cbe..d3e8faa 100644
--- a/drivers/crypto/ccree/cc_aead.c
+++ b/drivers/crypto/ccree/cc_aead.c
@@ -6,7 +6,7 @@
 #include <crypto/algapi.h>
 #include <crypto/internal/aead.h>
 #include <crypto/authenc.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <linux/rtnetlink.h>
 #include "cc_driver.h"
 #include "cc_buffer_mgr.h"
@@ -236,31 +236,17 @@
 			/* In case of payload authentication failure, MUST NOT
 			 * revealed the decrypted message --> zero its memory.
 			 */
-			cc_zero_sgl(areq->dst, areq_ctx->cryptlen);
+			sg_zero_buffer(areq->dst, sg_nents(areq->dst),
+				       areq->cryptlen, 0);
 			err = -EBADMSG;
 		}
-	} else { /*ENCRYPT*/
-		if (areq_ctx->is_icv_fragmented) {
-			u32 skip = areq->cryptlen + areq_ctx->dst_offset;
+	/*ENCRYPT*/
+	} else if (areq_ctx->is_icv_fragmented) {
+		u32 skip = areq->cryptlen + areq_ctx->dst_offset;
 
-			cc_copy_sg_portion(dev, areq_ctx->mac_buf,
-					   areq_ctx->dst_sgl, skip,
-					   (skip + ctx->authsize),
-					   CC_SG_FROM_BUF);
-		}
-
-		/* If an IV was generated, copy it back to the user provided
-		 * buffer.
-		 */
-		if (areq_ctx->backup_giv) {
-			if (ctx->cipher_mode == DRV_CIPHER_CTR)
-				memcpy(areq_ctx->backup_giv, areq_ctx->ctr_iv +
-				       CTR_RFC3686_NONCE_SIZE,
-				       CTR_RFC3686_IV_SIZE);
-			else if (ctx->cipher_mode == DRV_CIPHER_CCM)
-				memcpy(areq_ctx->backup_giv, areq_ctx->ctr_iv +
-				       CCM_BLOCK_IV_OFFSET, CCM_BLOCK_IV_SIZE);
-		}
+		cc_copy_sg_portion(dev, areq_ctx->mac_buf, areq_ctx->dst_sgl,
+				   skip, (skip + ctx->authsize),
+				   CC_SG_FROM_BUF);
 	}
 done:
 	aead_request_complete(areq, err);
@@ -663,33 +649,17 @@
 			       unsigned int keylen)
 {
 	struct crypto_authenc_keys keys;
-	u32 flags;
 	int err;
 
 	err = crypto_authenc_extractkeys(&keys, key, keylen);
 	if (unlikely(err))
-		goto badkey;
+		return err;
 
-	err = -EINVAL;
-	if (keys.enckeylen != DES3_EDE_KEY_SIZE)
-		goto badkey;
+	err = verify_aead_des3_key(aead, keys.enckey, keys.enckeylen) ?:
+	      cc_aead_setkey(aead, key, keylen);
 
-	flags = crypto_aead_get_flags(aead);
-	err = __des3_verify_key(&flags, keys.enckey);
-	if (unlikely(err)) {
-		crypto_aead_set_flags(aead, flags);
-		goto out;
-	}
-
-	err = cc_aead_setkey(aead, key, keylen);
-
-out:
 	memzero_explicit(&keys, sizeof(keys));
 	return err;
-
-badkey:
-	crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN);
-	goto out;
 }
 
 static int cc_rfc4309_ccm_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -1975,9 +1945,8 @@
 		 */
 		memcpy(areq_ctx->ctr_iv, ctx->ctr_nonce,
 		       CTR_RFC3686_NONCE_SIZE);
-		if (!areq_ctx->backup_giv) /*User none-generated IV*/
-			memcpy(areq_ctx->ctr_iv + CTR_RFC3686_NONCE_SIZE,
-			       req->iv, CTR_RFC3686_IV_SIZE);
+		memcpy(areq_ctx->ctr_iv + CTR_RFC3686_NONCE_SIZE, req->iv,
+		       CTR_RFC3686_IV_SIZE);
 		/* Initialize counter portion of counter block */
 		*(__be32 *)(areq_ctx->ctr_iv + CTR_RFC3686_NONCE_SIZE +
 			    CTR_RFC3686_IV_SIZE) = cpu_to_be32(1);
@@ -2023,40 +1992,6 @@
 		goto exit;
 	}
 
-	/* do we need to generate IV? */
-	if (areq_ctx->backup_giv) {
-		/* set the DMA mapped IV address*/
-		if (ctx->cipher_mode == DRV_CIPHER_CTR) {
-			cc_req.ivgen_dma_addr[0] =
-				areq_ctx->gen_ctx.iv_dma_addr +
-				CTR_RFC3686_NONCE_SIZE;
-			cc_req.ivgen_dma_addr_len = 1;
-		} else if (ctx->cipher_mode == DRV_CIPHER_CCM) {
-			/* In ccm, the IV needs to exist both inside B0 and
-			 * inside the counter.It is also copied to iv_dma_addr
-			 * for other reasons (like returning it to the user).
-			 * So, using 3 (identical) IV outputs.
-			 */
-			cc_req.ivgen_dma_addr[0] =
-				areq_ctx->gen_ctx.iv_dma_addr +
-				CCM_BLOCK_IV_OFFSET;
-			cc_req.ivgen_dma_addr[1] =
-				sg_dma_address(&areq_ctx->ccm_adata_sg) +
-				CCM_B0_OFFSET + CCM_BLOCK_IV_OFFSET;
-			cc_req.ivgen_dma_addr[2] =
-				sg_dma_address(&areq_ctx->ccm_adata_sg) +
-				CCM_CTR_COUNT_0_OFFSET + CCM_BLOCK_IV_OFFSET;
-			cc_req.ivgen_dma_addr_len = 3;
-		} else {
-			cc_req.ivgen_dma_addr[0] =
-				areq_ctx->gen_ctx.iv_dma_addr;
-			cc_req.ivgen_dma_addr_len = 1;
-		}
-
-		/* set the IV size (8/16 B long)*/
-		cc_req.ivgen_size = crypto_aead_ivsize(tfm);
-	}
-
 	/* STAT_PHASE_2: Create sequence */
 
 	/* Load MLLI tables to SRAM if necessary */
@@ -2107,7 +2042,6 @@
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
 	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->backup_giv = NULL;
 	areq_ctx->is_gcm4543 = false;
 
 	areq_ctx->plaintext_authenticate_only = false;
@@ -2139,7 +2073,6 @@
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
 	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->backup_giv = NULL;
 	areq_ctx->is_gcm4543 = true;
 
 	cc_proc_rfc4309_ccm(req);
@@ -2161,7 +2094,6 @@
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
 	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->backup_giv = NULL;
 	areq_ctx->is_gcm4543 = false;
 
 	areq_ctx->plaintext_authenticate_only = false;
@@ -2191,7 +2123,6 @@
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
 	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->backup_giv = NULL;
 
 	areq_ctx->is_gcm4543 = true;
 	cc_proc_rfc4309_ccm(req);
@@ -2311,8 +2242,6 @@
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
 	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->backup_giv = NULL;
-
 	areq_ctx->plaintext_authenticate_only = false;
 
 	cc_proc_rfc4_gcm(req);
@@ -2328,9 +2257,16 @@
 static int cc_rfc4543_gcm_encrypt(struct aead_request *req)
 {
 	/* Very similar to cc_aead_encrypt() above. */
-
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
-	int rc;
+	int rc = -EINVAL;
+
+	if (!valid_assoclen(req)) {
+		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		goto out;
+	}
 
 	memset(areq_ctx, 0, sizeof(*areq_ctx));
 
@@ -2340,7 +2276,6 @@
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
 	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->backup_giv = NULL;
 
 	cc_proc_rfc4_gcm(req);
 	areq_ctx->is_gcm4543 = true;
@@ -2348,7 +2283,7 @@
 	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_ENCRYPT);
 	if (rc != -EINPROGRESS && rc != -EBUSY)
 		req->iv = areq_ctx->backup_iv;
-
+out:
 	return rc;
 }
 
@@ -2372,8 +2307,6 @@
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
 	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->backup_giv = NULL;
-
 	areq_ctx->plaintext_authenticate_only = false;
 
 	cc_proc_rfc4_gcm(req);
@@ -2389,9 +2322,16 @@
 static int cc_rfc4543_gcm_decrypt(struct aead_request *req)
 {
 	/* Very similar to cc_aead_decrypt() above. */
-
+	struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+	struct cc_aead_ctx *ctx = crypto_aead_ctx(tfm);
+	struct device *dev = drvdata_to_dev(ctx->drvdata);
 	struct aead_req_ctx *areq_ctx = aead_request_ctx(req);
-	int rc;
+	int rc = -EINVAL;
+
+	if (!valid_assoclen(req)) {
+		dev_err(dev, "invalid Assoclen:%u\n", req->assoclen);
+		goto out;
+	}
 
 	memset(areq_ctx, 0, sizeof(*areq_ctx));
 
@@ -2401,7 +2341,6 @@
 	/* No generated IV required */
 	areq_ctx->backup_iv = req->iv;
 	areq_ctx->assoclen = req->assoclen;
-	areq_ctx->backup_giv = NULL;
 
 	cc_proc_rfc4_gcm(req);
 	areq_ctx->is_gcm4543 = true;
@@ -2409,7 +2348,7 @@
 	rc = cc_proc_aead(req, DRV_CRYPTO_DIRECTION_DECRYPT);
 	if (rc != -EINPROGRESS && rc != -EBUSY)
 		req->iv = areq_ctx->backup_iv;
-
+out:
 	return rc;
 }
 
diff --git a/drivers/crypto/ccree/cc_aead.h b/drivers/crypto/ccree/cc_aead.h
index e51724b..f12169b5 100644
--- a/drivers/crypto/ccree/cc_aead.h
+++ b/drivers/crypto/ccree/cc_aead.h
@@ -65,8 +65,7 @@
 	unsigned int hw_iv_size ____cacheline_aligned;
 	/* used to prevent cache coherence problem */
 	u8 backup_mac[MAX_MAC_SIZE];
-	u8 *backup_iv; /*store iv for generated IV flow*/
-	u8 *backup_giv; /*store iv for rfc3686(ctr) flow*/
+	u8 *backup_iv; /* store orig iv */
 	u32 assoclen; /* internal assoclen */
 	dma_addr_t mac_buf_dma_addr; /* internal ICV DMA buffer */
 	/* buffer for internal ccm configurations */
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.c b/drivers/crypto/ccree/cc_buffer_mgr.c
index c81ad33..a72586e 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.c
+++ b/drivers/crypto/ccree/cc_buffer_mgr.c
@@ -100,27 +100,6 @@
 }
 
 /**
- * cc_zero_sgl() - Zero scatter scatter list data.
- *
- * @sgl:
- */
-void cc_zero_sgl(struct scatterlist *sgl, u32 data_len)
-{
-	struct scatterlist *current_sg = sgl;
-	int sg_index = 0;
-
-	while (sg_index <= data_len) {
-		if (!current_sg) {
-			/* reached the end of the sgl --> just return back */
-			return;
-		}
-		memset(sg_virt(current_sg), 0, current_sg->length);
-		sg_index += current_sg->length;
-		current_sg = sg_next(current_sg);
-	}
-}
-
-/**
  * cc_copy_sg_portion() - Copy scatter list data,
  * from to_skip to end, to dest and vice versa
  *
diff --git a/drivers/crypto/ccree/cc_buffer_mgr.h b/drivers/crypto/ccree/cc_buffer_mgr.h
index a726016..af43487 100644
--- a/drivers/crypto/ccree/cc_buffer_mgr.h
+++ b/drivers/crypto/ccree/cc_buffer_mgr.h
@@ -66,6 +66,4 @@
 void cc_copy_sg_portion(struct device *dev, u8 *dest, struct scatterlist *sg,
 			u32 to_skip, u32 end, enum cc_sg_cpy_direct direct);
 
-void cc_zero_sgl(struct scatterlist *sgl, u32 data_len);
-
 #endif /*__BUFFER_MGR_H__*/
diff --git a/drivers/crypto/ccree/cc_cipher.c b/drivers/crypto/ccree/cc_cipher.c
index 5b58226..254b4879 100644
--- a/drivers/crypto/ccree/cc_cipher.c
+++ b/drivers/crypto/ccree/cc_cipher.c
@@ -5,7 +5,7 @@
 #include <linux/module.h>
 #include <crypto/algapi.h>
 #include <crypto/internal/skcipher.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/xts.h>
 #include <crypto/sm4.h>
 #include <crypto/scatterwalk.h>
@@ -116,10 +116,6 @@
 	case S_DIN_to_AES:
 		switch (ctx_p->cipher_mode) {
 		case DRV_CIPHER_XTS:
-			if (size >= AES_BLOCK_SIZE &&
-			    IS_ALIGNED(size, AES_BLOCK_SIZE))
-				return 0;
-			break;
 		case DRV_CIPHER_CBC_CTS:
 			if (size >= AES_BLOCK_SIZE)
 				return 0;
@@ -411,16 +407,9 @@
 	 * HW does the expansion on its own.
 	 */
 	if (ctx_p->flow_mode == S_DIN_to_DES) {
-		u32 tmp[DES3_EDE_EXPKEY_WORDS];
-		if (keylen == DES3_EDE_KEY_SIZE &&
-		    __des3_ede_setkey(tmp, &tfm->crt_flags, key,
-				      DES3_EDE_KEY_SIZE)) {
-			dev_dbg(dev, "weak 3DES key");
-			return -EINVAL;
-		} else if (!des_ekey(tmp, key) &&
-			   (crypto_tfm_get_flags(tfm) &
-			    CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-			tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
+		if ((keylen == DES3_EDE_KEY_SIZE &&
+		     verify_skcipher_des3_key(sktfm, key)) ||
+		    verify_skcipher_des_key(sktfm, key)) {
 			dev_dbg(dev, "weak DES key");
 			return -EINVAL;
 		}
@@ -945,7 +934,7 @@
 	{
 		.name = "xts(paes)",
 		.driver_name = "xts-paes-ccree",
-		.blocksize = AES_BLOCK_SIZE,
+		.blocksize = 1,
 		.template_skcipher = {
 			.setkey = cc_cipher_sethkey,
 			.encrypt = cc_cipher_encrypt,
@@ -963,7 +952,7 @@
 	{
 		.name = "xts512(paes)",
 		.driver_name = "xts-paes-du512-ccree",
-		.blocksize = AES_BLOCK_SIZE,
+		.blocksize = 1,
 		.template_skcipher = {
 			.setkey = cc_cipher_sethkey,
 			.encrypt = cc_cipher_encrypt,
@@ -982,7 +971,7 @@
 	{
 		.name = "xts4096(paes)",
 		.driver_name = "xts-paes-du4096-ccree",
-		.blocksize = AES_BLOCK_SIZE,
+		.blocksize = 1,
 		.template_skcipher = {
 			.setkey = cc_cipher_sethkey,
 			.encrypt = cc_cipher_encrypt,
@@ -1203,7 +1192,7 @@
 	{
 		.name = "xts(aes)",
 		.driver_name = "xts-aes-ccree",
-		.blocksize = AES_BLOCK_SIZE,
+		.blocksize = 1,
 		.template_skcipher = {
 			.setkey = cc_cipher_setkey,
 			.encrypt = cc_cipher_encrypt,
@@ -1220,7 +1209,7 @@
 	{
 		.name = "xts512(aes)",
 		.driver_name = "xts-aes-du512-ccree",
-		.blocksize = AES_BLOCK_SIZE,
+		.blocksize = 1,
 		.template_skcipher = {
 			.setkey = cc_cipher_setkey,
 			.encrypt = cc_cipher_encrypt,
@@ -1238,7 +1227,7 @@
 	{
 		.name = "xts4096(aes)",
 		.driver_name = "xts-aes-du4096-ccree",
-		.blocksize = AES_BLOCK_SIZE,
+		.blocksize = 1,
 		.template_skcipher = {
 			.setkey = cc_cipher_setkey,
 			.encrypt = cc_cipher_encrypt,
diff --git a/drivers/crypto/ccree/cc_driver.c b/drivers/crypto/ccree/cc_driver.c
index 980aa04..8b8eee5 100644
--- a/drivers/crypto/ccree/cc_driver.c
+++ b/drivers/crypto/ccree/cc_driver.c
@@ -22,7 +22,6 @@
 #include "cc_cipher.h"
 #include "cc_aead.h"
 #include "cc_hash.h"
-#include "cc_ivgen.h"
 #include "cc_sram_mgr.h"
 #include "cc_pm.h"
 #include "cc_fips.h"
@@ -339,10 +338,8 @@
 
 	/* Then IRQ */
 	new_drvdata->irq = platform_get_irq(plat_dev, 0);
-	if (new_drvdata->irq < 0) {
-		dev_err(dev, "Failed getting IRQ resource\n");
+	if (new_drvdata->irq < 0)
 		return new_drvdata->irq;
-	}
 
 	init_completion(&new_drvdata->hw_queue_avail);
 
@@ -421,7 +418,7 @@
 			}
 			break;
 		default:
-			dev_err(dev, "Unsupported engines configration.\n");
+			dev_err(dev, "Unsupported engines configuration.\n");
 			rc = -EINVAL;
 			goto post_clk_err;
 		}
@@ -503,17 +500,11 @@
 		goto post_buf_mgr_err;
 	}
 
-	rc = cc_ivgen_init(new_drvdata);
-	if (rc) {
-		dev_err(dev, "cc_ivgen_init failed\n");
-		goto post_buf_mgr_err;
-	}
-
 	/* Allocate crypto algs */
 	rc = cc_cipher_alloc(new_drvdata);
 	if (rc) {
 		dev_err(dev, "cc_cipher_alloc failed\n");
-		goto post_ivgen_err;
+		goto post_buf_mgr_err;
 	}
 
 	/* hash must be allocated before aead since hash exports APIs */
@@ -544,8 +535,6 @@
 	cc_hash_free(new_drvdata);
 post_cipher_err:
 	cc_cipher_free(new_drvdata);
-post_ivgen_err:
-	cc_ivgen_fini(new_drvdata);
 post_buf_mgr_err:
 	 cc_buffer_mgr_fini(new_drvdata);
 post_req_mgr_err:
@@ -577,7 +566,6 @@
 	cc_aead_free(drvdata);
 	cc_hash_free(drvdata);
 	cc_cipher_free(drvdata);
-	cc_ivgen_fini(drvdata);
 	cc_pm_fini(drvdata);
 	cc_buffer_mgr_fini(drvdata);
 	cc_req_mgr_fini(drvdata);
diff --git a/drivers/crypto/ccree/cc_driver.h b/drivers/crypto/ccree/cc_driver.h
index 7cd9938..ab31d4a 100644
--- a/drivers/crypto/ccree/cc_driver.h
+++ b/drivers/crypto/ccree/cc_driver.h
@@ -126,15 +126,6 @@
 struct cc_crypto_req {
 	void (*user_cb)(struct device *dev, void *req, int err);
 	void *user_arg;
-	dma_addr_t ivgen_dma_addr[CC_MAX_IVGEN_DMA_ADDRESSES];
-	/* For the first 'ivgen_dma_addr_len' addresses of this array,
-	 * generated IV would be placed in it by send_request().
-	 * Same generated IV for all addresses!
-	 */
-	/* Amount of 'ivgen_dma_addr' elements to be filled. */
-	unsigned int ivgen_dma_addr_len;
-	/* The generated IV size required, 8/16 B allowed. */
-	unsigned int ivgen_size;
 	struct completion seq_compl; /* request completion */
 	struct cc_cpp_req cpp;
 };
@@ -158,7 +149,6 @@
 	void *aead_handle;
 	void *request_mgr_handle;
 	void *fips_handle;
-	void *ivgen_handle;
 	void *sram_mgr_handle;
 	void *debugfs;
 	struct clk *clk;
diff --git a/drivers/crypto/ccree/cc_fips.c b/drivers/crypto/ccree/cc_fips.c
index 5ad3ffb..4c8bce3 100644
--- a/drivers/crypto/ccree/cc_fips.c
+++ b/drivers/crypto/ccree/cc_fips.c
@@ -3,6 +3,7 @@
 
 #include <linux/kernel.h>
 #include <linux/fips.h>
+#include <linux/notifier.h>
 
 #include "cc_driver.h"
 #include "cc_fips.h"
@@ -11,6 +12,8 @@
 
 struct cc_fips_handle {
 	struct tasklet_struct tasklet;
+	struct notifier_block nb;
+	struct cc_drvdata *drvdata;
 };
 
 /* The function called once at driver entry point to check
@@ -21,7 +24,13 @@
 	u32 reg;
 
 	reg = cc_ioread(drvdata, CC_REG(GPR_HOST));
-	return (reg == (CC_FIPS_SYNC_TEE_STATUS | CC_FIPS_SYNC_MODULE_OK));
+	/* Did the TEE report status? */
+	if (reg & CC_FIPS_SYNC_TEE_STATUS)
+		/* Yes. Is it OK? */
+		return (reg & CC_FIPS_SYNC_MODULE_OK);
+
+	/* No. It's either not in use or will be reported later */
+	return true;
 }
 
 /*
@@ -40,6 +49,21 @@
 	cc_iowrite(drvdata, CC_REG(HOST_GPR0), val);
 }
 
+/* Push REE side FIPS test failure to TEE side */
+static int cc_ree_fips_failure(struct notifier_block *nb, unsigned long unused1,
+			       void *unused2)
+{
+	struct cc_fips_handle *fips_h =
+				container_of(nb, struct cc_fips_handle, nb);
+	struct cc_drvdata *drvdata = fips_h->drvdata;
+	struct device *dev = drvdata_to_dev(drvdata);
+
+	cc_set_ree_fips_status(drvdata, false);
+	dev_info(dev, "Notifying TEE of FIPS test failure...\n");
+
+	return NOTIFY_OK;
+}
+
 void cc_fips_fini(struct cc_drvdata *drvdata)
 {
 	struct cc_fips_handle *fips_h = drvdata->fips_handle;
@@ -47,6 +71,8 @@
 	if (drvdata->hw_rev < CC_HW_REV_712 || !fips_h)
 		return;
 
+	atomic_notifier_chain_unregister(&fips_fail_notif_chain, &fips_h->nb);
+
 	/* Kill tasklet */
 	tasklet_kill(&fips_h->tasklet);
 	drvdata->fips_handle = NULL;
@@ -118,6 +144,9 @@
 
 	dev_dbg(dev, "Initializing fips tasklet\n");
 	tasklet_init(&fips_h->tasklet, fips_dsr, (unsigned long)p_drvdata);
+	fips_h->drvdata = p_drvdata;
+	fips_h->nb.notifier_call = cc_ree_fips_failure;
+	atomic_notifier_chain_register(&fips_fail_notif_chain, &fips_h->nb);
 
 	cc_tee_handle_fips_error(p_drvdata);
 
diff --git a/drivers/crypto/ccree/cc_hash.c b/drivers/crypto/ccree/cc_hash.c
index a6abe4e..bc71bdf 100644
--- a/drivers/crypto/ccree/cc_hash.c
+++ b/drivers/crypto/ccree/cc_hash.c
@@ -25,27 +25,27 @@
 	struct list_head hash_list;
 };
 
-static const u32 digest_len_init[] = {
+static const u32 cc_digest_len_init[] = {
 	0x00000040, 0x00000000, 0x00000000, 0x00000000 };
-static const u32 md5_init[] = {
+static const u32 cc_md5_init[] = {
 	SHA1_H3, SHA1_H2, SHA1_H1, SHA1_H0 };
-static const u32 sha1_init[] = {
+static const u32 cc_sha1_init[] = {
 	SHA1_H4, SHA1_H3, SHA1_H2, SHA1_H1, SHA1_H0 };
-static const u32 sha224_init[] = {
+static const u32 cc_sha224_init[] = {
 	SHA224_H7, SHA224_H6, SHA224_H5, SHA224_H4,
 	SHA224_H3, SHA224_H2, SHA224_H1, SHA224_H0 };
-static const u32 sha256_init[] = {
+static const u32 cc_sha256_init[] = {
 	SHA256_H7, SHA256_H6, SHA256_H5, SHA256_H4,
 	SHA256_H3, SHA256_H2, SHA256_H1, SHA256_H0 };
-static const u32 digest_len_sha512_init[] = {
+static const u32 cc_digest_len_sha512_init[] = {
 	0x00000080, 0x00000000, 0x00000000, 0x00000000 };
-static u64 sha384_init[] = {
+static u64 cc_sha384_init[] = {
 	SHA384_H7, SHA384_H6, SHA384_H5, SHA384_H4,
 	SHA384_H3, SHA384_H2, SHA384_H1, SHA384_H0 };
-static u64 sha512_init[] = {
+static u64 cc_sha512_init[] = {
 	SHA512_H7, SHA512_H6, SHA512_H5, SHA512_H4,
 	SHA512_H3, SHA512_H2, SHA512_H1, SHA512_H0 };
-static const u32 sm3_init[] = {
+static const u32 cc_sm3_init[] = {
 	SM3_IVH, SM3_IVG, SM3_IVF, SM3_IVE,
 	SM3_IVD, SM3_IVC, SM3_IVB, SM3_IVA };
 
@@ -144,10 +144,11 @@
 			if (ctx->hash_mode == DRV_HASH_SHA512 ||
 			    ctx->hash_mode == DRV_HASH_SHA384)
 				memcpy(state->digest_bytes_len,
-				       digest_len_sha512_init,
+				       cc_digest_len_sha512_init,
 				       ctx->hash_len);
 			else
-				memcpy(state->digest_bytes_len, digest_len_init,
+				memcpy(state->digest_bytes_len,
+				       cc_digest_len_init,
 				       ctx->hash_len);
 		}
 
@@ -1873,26 +1874,26 @@
 	int rc = 0;
 
 	/* Copy-to-sram digest-len */
-	cc_set_sram_desc(digest_len_init, sram_buff_ofs,
-			 ARRAY_SIZE(digest_len_init), larval_seq,
+	cc_set_sram_desc(cc_digest_len_init, sram_buff_ofs,
+			 ARRAY_SIZE(cc_digest_len_init), larval_seq,
 			 &larval_seq_len);
 	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
 	if (rc)
 		goto init_digest_const_err;
 
-	sram_buff_ofs += sizeof(digest_len_init);
+	sram_buff_ofs += sizeof(cc_digest_len_init);
 	larval_seq_len = 0;
 
 	if (large_sha_supported) {
 		/* Copy-to-sram digest-len for sha384/512 */
-		cc_set_sram_desc(digest_len_sha512_init, sram_buff_ofs,
-				 ARRAY_SIZE(digest_len_sha512_init),
+		cc_set_sram_desc(cc_digest_len_sha512_init, sram_buff_ofs,
+				 ARRAY_SIZE(cc_digest_len_sha512_init),
 				 larval_seq, &larval_seq_len);
 		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
 		if (rc)
 			goto init_digest_const_err;
 
-		sram_buff_ofs += sizeof(digest_len_sha512_init);
+		sram_buff_ofs += sizeof(cc_digest_len_sha512_init);
 		larval_seq_len = 0;
 	}
 
@@ -1900,64 +1901,64 @@
 	hash_handle->larval_digest_sram_addr = sram_buff_ofs;
 
 	/* Copy-to-sram initial SHA* digests */
-	cc_set_sram_desc(md5_init, sram_buff_ofs, ARRAY_SIZE(md5_init),
+	cc_set_sram_desc(cc_md5_init, sram_buff_ofs, ARRAY_SIZE(cc_md5_init),
 			 larval_seq, &larval_seq_len);
 	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
 	if (rc)
 		goto init_digest_const_err;
-	sram_buff_ofs += sizeof(md5_init);
+	sram_buff_ofs += sizeof(cc_md5_init);
 	larval_seq_len = 0;
 
-	cc_set_sram_desc(sha1_init, sram_buff_ofs,
-			 ARRAY_SIZE(sha1_init), larval_seq,
+	cc_set_sram_desc(cc_sha1_init, sram_buff_ofs,
+			 ARRAY_SIZE(cc_sha1_init), larval_seq,
 			 &larval_seq_len);
 	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
 	if (rc)
 		goto init_digest_const_err;
-	sram_buff_ofs += sizeof(sha1_init);
+	sram_buff_ofs += sizeof(cc_sha1_init);
 	larval_seq_len = 0;
 
-	cc_set_sram_desc(sha224_init, sram_buff_ofs,
-			 ARRAY_SIZE(sha224_init), larval_seq,
+	cc_set_sram_desc(cc_sha224_init, sram_buff_ofs,
+			 ARRAY_SIZE(cc_sha224_init), larval_seq,
 			 &larval_seq_len);
 	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
 	if (rc)
 		goto init_digest_const_err;
-	sram_buff_ofs += sizeof(sha224_init);
+	sram_buff_ofs += sizeof(cc_sha224_init);
 	larval_seq_len = 0;
 
-	cc_set_sram_desc(sha256_init, sram_buff_ofs,
-			 ARRAY_SIZE(sha256_init), larval_seq,
+	cc_set_sram_desc(cc_sha256_init, sram_buff_ofs,
+			 ARRAY_SIZE(cc_sha256_init), larval_seq,
 			 &larval_seq_len);
 	rc = send_request_init(drvdata, larval_seq, larval_seq_len);
 	if (rc)
 		goto init_digest_const_err;
-	sram_buff_ofs += sizeof(sha256_init);
+	sram_buff_ofs += sizeof(cc_sha256_init);
 	larval_seq_len = 0;
 
 	if (sm3_supported) {
-		cc_set_sram_desc(sm3_init, sram_buff_ofs,
-				 ARRAY_SIZE(sm3_init), larval_seq,
+		cc_set_sram_desc(cc_sm3_init, sram_buff_ofs,
+				 ARRAY_SIZE(cc_sm3_init), larval_seq,
 				 &larval_seq_len);
 		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
 		if (rc)
 			goto init_digest_const_err;
-		sram_buff_ofs += sizeof(sm3_init);
+		sram_buff_ofs += sizeof(cc_sm3_init);
 		larval_seq_len = 0;
 	}
 
 	if (large_sha_supported) {
-		cc_set_sram_desc((u32 *)sha384_init, sram_buff_ofs,
-				 (ARRAY_SIZE(sha384_init) * 2), larval_seq,
+		cc_set_sram_desc((u32 *)cc_sha384_init, sram_buff_ofs,
+				 (ARRAY_SIZE(cc_sha384_init) * 2), larval_seq,
 				 &larval_seq_len);
 		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
 		if (rc)
 			goto init_digest_const_err;
-		sram_buff_ofs += sizeof(sha384_init);
+		sram_buff_ofs += sizeof(cc_sha384_init);
 		larval_seq_len = 0;
 
-		cc_set_sram_desc((u32 *)sha512_init, sram_buff_ofs,
-				 (ARRAY_SIZE(sha512_init) * 2), larval_seq,
+		cc_set_sram_desc((u32 *)cc_sha512_init, sram_buff_ofs,
+				 (ARRAY_SIZE(cc_sha512_init) * 2), larval_seq,
 				 &larval_seq_len);
 		rc = send_request_init(drvdata, larval_seq, larval_seq_len);
 		if (rc)
@@ -1986,8 +1987,8 @@
  */
 void __init cc_hash_global_init(void)
 {
-	cc_swap_dwords((u32 *)&sha384_init, (ARRAY_SIZE(sha384_init) * 2));
-	cc_swap_dwords((u32 *)&sha512_init, (ARRAY_SIZE(sha512_init) * 2));
+	cc_swap_dwords((u32 *)&cc_sha384_init, (ARRAY_SIZE(cc_sha384_init) * 2));
+	cc_swap_dwords((u32 *)&cc_sha512_init, (ARRAY_SIZE(cc_sha512_init) * 2));
 }
 
 int cc_hash_alloc(struct cc_drvdata *drvdata)
@@ -2006,18 +2007,18 @@
 	INIT_LIST_HEAD(&hash_handle->hash_list);
 	drvdata->hash_handle = hash_handle;
 
-	sram_size_to_alloc = sizeof(digest_len_init) +
-			sizeof(md5_init) +
-			sizeof(sha1_init) +
-			sizeof(sha224_init) +
-			sizeof(sha256_init);
+	sram_size_to_alloc = sizeof(cc_digest_len_init) +
+			sizeof(cc_md5_init) +
+			sizeof(cc_sha1_init) +
+			sizeof(cc_sha224_init) +
+			sizeof(cc_sha256_init);
 
 	if (drvdata->hw_rev >= CC_HW_REV_713)
-		sram_size_to_alloc += sizeof(sm3_init);
+		sram_size_to_alloc += sizeof(cc_sm3_init);
 
 	if (drvdata->hw_rev >= CC_HW_REV_712)
-		sram_size_to_alloc += sizeof(digest_len_sha512_init) +
-			sizeof(sha384_init) + sizeof(sha512_init);
+		sram_size_to_alloc += sizeof(cc_digest_len_sha512_init) +
+			sizeof(cc_sha384_init) + sizeof(cc_sha512_init);
 
 	sram_buff = cc_sram_alloc(drvdata, sram_size_to_alloc);
 	if (sram_buff == NULL_SRAM_ADDR) {
@@ -2258,22 +2259,22 @@
 {
 	switch (mode) {
 	case DRV_HASH_MD5:
-		return md5_init;
+		return cc_md5_init;
 	case DRV_HASH_SHA1:
-		return sha1_init;
+		return cc_sha1_init;
 	case DRV_HASH_SHA224:
-		return sha224_init;
+		return cc_sha224_init;
 	case DRV_HASH_SHA256:
-		return sha256_init;
+		return cc_sha256_init;
 	case DRV_HASH_SHA384:
-		return sha384_init;
+		return cc_sha384_init;
 	case DRV_HASH_SHA512:
-		return sha512_init;
+		return cc_sha512_init;
 	case DRV_HASH_SM3:
-		return sm3_init;
+		return cc_sm3_init;
 	default:
 		dev_err(dev, "Invalid hash mode (%d)\n", mode);
-		return md5_init;
+		return cc_md5_init;
 	}
 }
 
@@ -2301,40 +2302,40 @@
 		return (hash_handle->larval_digest_sram_addr);
 	case DRV_HASH_SHA1:
 		return (hash_handle->larval_digest_sram_addr +
-			sizeof(md5_init));
+			sizeof(cc_md5_init));
 	case DRV_HASH_SHA224:
 		return (hash_handle->larval_digest_sram_addr +
-			sizeof(md5_init) +
-			sizeof(sha1_init));
+			sizeof(cc_md5_init) +
+			sizeof(cc_sha1_init));
 	case DRV_HASH_SHA256:
 		return (hash_handle->larval_digest_sram_addr +
-			sizeof(md5_init) +
-			sizeof(sha1_init) +
-			sizeof(sha224_init));
+			sizeof(cc_md5_init) +
+			sizeof(cc_sha1_init) +
+			sizeof(cc_sha224_init));
 	case DRV_HASH_SM3:
 		return (hash_handle->larval_digest_sram_addr +
-			sizeof(md5_init) +
-			sizeof(sha1_init) +
-			sizeof(sha224_init) +
-			sizeof(sha256_init));
+			sizeof(cc_md5_init) +
+			sizeof(cc_sha1_init) +
+			sizeof(cc_sha224_init) +
+			sizeof(cc_sha256_init));
 	case DRV_HASH_SHA384:
 		addr = (hash_handle->larval_digest_sram_addr +
-			sizeof(md5_init) +
-			sizeof(sha1_init) +
-			sizeof(sha224_init) +
-			sizeof(sha256_init));
+			sizeof(cc_md5_init) +
+			sizeof(cc_sha1_init) +
+			sizeof(cc_sha224_init) +
+			sizeof(cc_sha256_init));
 		if (sm3_supported)
-			addr += sizeof(sm3_init);
+			addr += sizeof(cc_sm3_init);
 		return addr;
 	case DRV_HASH_SHA512:
 		addr = (hash_handle->larval_digest_sram_addr +
-			sizeof(md5_init) +
-			sizeof(sha1_init) +
-			sizeof(sha224_init) +
-			sizeof(sha256_init) +
-			sizeof(sha384_init));
+			sizeof(cc_md5_init) +
+			sizeof(cc_sha1_init) +
+			sizeof(cc_sha224_init) +
+			sizeof(cc_sha256_init) +
+			sizeof(cc_sha384_init));
 		if (sm3_supported)
-			addr += sizeof(sm3_init);
+			addr += sizeof(cc_sm3_init);
 		return addr;
 	default:
 		dev_err(dev, "Invalid hash mode (%d)\n", mode);
@@ -2360,7 +2361,7 @@
 #if (CC_DEV_SHA_MAX > 256)
 	case DRV_HASH_SHA384:
 	case DRV_HASH_SHA512:
-		return  digest_len_addr + sizeof(digest_len_init);
+		return  digest_len_addr + sizeof(cc_digest_len_init);
 #endif
 	default:
 		return digest_len_addr; /*to avoid kernel crash*/
diff --git a/drivers/crypto/ccree/cc_ivgen.c b/drivers/crypto/ccree/cc_ivgen.c
deleted file mode 100644
index 99dc693..0000000
--- a/drivers/crypto/ccree/cc_ivgen.c
+++ /dev/null
@@ -1,276 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (C) 2012-2019 ARM Limited (or its affiliates). */
-
-#include <crypto/ctr.h>
-#include "cc_driver.h"
-#include "cc_ivgen.h"
-#include "cc_request_mgr.h"
-#include "cc_sram_mgr.h"
-#include "cc_buffer_mgr.h"
-
-/* The max. size of pool *MUST* be <= SRAM total size */
-#define CC_IVPOOL_SIZE 1024
-/* The first 32B fraction of pool are dedicated to the
- * next encryption "key" & "IV" for pool regeneration
- */
-#define CC_IVPOOL_META_SIZE (CC_AES_IV_SIZE + AES_KEYSIZE_128)
-#define CC_IVPOOL_GEN_SEQ_LEN	4
-
-/**
- * struct cc_ivgen_ctx -IV pool generation context
- * @pool:          the start address of the iv-pool resides in internal RAM
- * @ctr_key_dma:   address of pool's encryption key material in internal RAM
- * @ctr_iv_dma:    address of pool's counter iv in internal RAM
- * @next_iv_ofs:   the offset to the next available IV in pool
- * @pool_meta:     virt. address of the initial enc. key/IV
- * @pool_meta_dma: phys. address of the initial enc. key/IV
- */
-struct cc_ivgen_ctx {
-	cc_sram_addr_t pool;
-	cc_sram_addr_t ctr_key;
-	cc_sram_addr_t ctr_iv;
-	u32 next_iv_ofs;
-	u8 *pool_meta;
-	dma_addr_t pool_meta_dma;
-};
-
-/*!
- * Generates CC_IVPOOL_SIZE of random bytes by
- * encrypting 0's using AES128-CTR.
- *
- * \param ivgen iv-pool context
- * \param iv_seq IN/OUT array to the descriptors sequence
- * \param iv_seq_len IN/OUT pointer to the sequence length
- */
-static int cc_gen_iv_pool(struct cc_ivgen_ctx *ivgen_ctx,
-			  struct cc_hw_desc iv_seq[], unsigned int *iv_seq_len)
-{
-	unsigned int idx = *iv_seq_len;
-
-	if ((*iv_seq_len + CC_IVPOOL_GEN_SEQ_LEN) > CC_IVPOOL_SEQ_LEN) {
-		/* The sequence will be longer than allowed */
-		return -EINVAL;
-	}
-	/* Setup key */
-	hw_desc_init(&iv_seq[idx]);
-	set_din_sram(&iv_seq[idx], ivgen_ctx->ctr_key, AES_KEYSIZE_128);
-	set_setup_mode(&iv_seq[idx], SETUP_LOAD_KEY0);
-	set_cipher_config0(&iv_seq[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
-	set_flow_mode(&iv_seq[idx], S_DIN_to_AES);
-	set_key_size_aes(&iv_seq[idx], CC_AES_128_BIT_KEY_SIZE);
-	set_cipher_mode(&iv_seq[idx], DRV_CIPHER_CTR);
-	idx++;
-
-	/* Setup cipher state */
-	hw_desc_init(&iv_seq[idx]);
-	set_din_sram(&iv_seq[idx], ivgen_ctx->ctr_iv, CC_AES_IV_SIZE);
-	set_cipher_config0(&iv_seq[idx], DESC_DIRECTION_ENCRYPT_ENCRYPT);
-	set_flow_mode(&iv_seq[idx], S_DIN_to_AES);
-	set_setup_mode(&iv_seq[idx], SETUP_LOAD_STATE1);
-	set_key_size_aes(&iv_seq[idx], CC_AES_128_BIT_KEY_SIZE);
-	set_cipher_mode(&iv_seq[idx], DRV_CIPHER_CTR);
-	idx++;
-
-	/* Perform dummy encrypt to skip first block */
-	hw_desc_init(&iv_seq[idx]);
-	set_din_const(&iv_seq[idx], 0, CC_AES_IV_SIZE);
-	set_dout_sram(&iv_seq[idx], ivgen_ctx->pool, CC_AES_IV_SIZE);
-	set_flow_mode(&iv_seq[idx], DIN_AES_DOUT);
-	idx++;
-
-	/* Generate IV pool */
-	hw_desc_init(&iv_seq[idx]);
-	set_din_const(&iv_seq[idx], 0, CC_IVPOOL_SIZE);
-	set_dout_sram(&iv_seq[idx], ivgen_ctx->pool, CC_IVPOOL_SIZE);
-	set_flow_mode(&iv_seq[idx], DIN_AES_DOUT);
-	idx++;
-
-	*iv_seq_len = idx; /* Update sequence length */
-
-	/* queue ordering assures pool readiness */
-	ivgen_ctx->next_iv_ofs = CC_IVPOOL_META_SIZE;
-
-	return 0;
-}
-
-/*!
- * Generates the initial pool in SRAM.
- * This function should be invoked when resuming driver.
- *
- * \param drvdata
- *
- * \return int Zero for success, negative value otherwise.
- */
-int cc_init_iv_sram(struct cc_drvdata *drvdata)
-{
-	struct cc_ivgen_ctx *ivgen_ctx = drvdata->ivgen_handle;
-	struct cc_hw_desc iv_seq[CC_IVPOOL_SEQ_LEN];
-	unsigned int iv_seq_len = 0;
-	int rc;
-
-	/* Generate initial enc. key/iv */
-	get_random_bytes(ivgen_ctx->pool_meta, CC_IVPOOL_META_SIZE);
-
-	/* The first 32B reserved for the enc. Key/IV */
-	ivgen_ctx->ctr_key = ivgen_ctx->pool;
-	ivgen_ctx->ctr_iv = ivgen_ctx->pool + AES_KEYSIZE_128;
-
-	/* Copy initial enc. key and IV to SRAM at a single descriptor */
-	hw_desc_init(&iv_seq[iv_seq_len]);
-	set_din_type(&iv_seq[iv_seq_len], DMA_DLLI, ivgen_ctx->pool_meta_dma,
-		     CC_IVPOOL_META_SIZE, NS_BIT);
-	set_dout_sram(&iv_seq[iv_seq_len], ivgen_ctx->pool,
-		      CC_IVPOOL_META_SIZE);
-	set_flow_mode(&iv_seq[iv_seq_len], BYPASS);
-	iv_seq_len++;
-
-	/* Generate initial pool */
-	rc = cc_gen_iv_pool(ivgen_ctx, iv_seq, &iv_seq_len);
-	if (rc)
-		return rc;
-
-	/* Fire-and-forget */
-	return send_request_init(drvdata, iv_seq, iv_seq_len);
-}
-
-/*!
- * Free iv-pool and ivgen context.
- *
- * \param drvdata
- */
-void cc_ivgen_fini(struct cc_drvdata *drvdata)
-{
-	struct cc_ivgen_ctx *ivgen_ctx = drvdata->ivgen_handle;
-	struct device *device = &drvdata->plat_dev->dev;
-
-	if (!ivgen_ctx)
-		return;
-
-	if (ivgen_ctx->pool_meta) {
-		memset(ivgen_ctx->pool_meta, 0, CC_IVPOOL_META_SIZE);
-		dma_free_coherent(device, CC_IVPOOL_META_SIZE,
-				  ivgen_ctx->pool_meta,
-				  ivgen_ctx->pool_meta_dma);
-	}
-
-	ivgen_ctx->pool = NULL_SRAM_ADDR;
-}
-
-/*!
- * Allocates iv-pool and maps resources.
- * This function generates the first IV pool.
- *
- * \param drvdata Driver's private context
- *
- * \return int Zero for success, negative value otherwise.
- */
-int cc_ivgen_init(struct cc_drvdata *drvdata)
-{
-	struct cc_ivgen_ctx *ivgen_ctx;
-	struct device *device = &drvdata->plat_dev->dev;
-	int rc;
-
-	/* Allocate "this" context */
-	ivgen_ctx = devm_kzalloc(device, sizeof(*ivgen_ctx), GFP_KERNEL);
-	if (!ivgen_ctx)
-		return -ENOMEM;
-
-	drvdata->ivgen_handle = ivgen_ctx;
-
-	/* Allocate pool's header for initial enc. key/IV */
-	ivgen_ctx->pool_meta = dma_alloc_coherent(device, CC_IVPOOL_META_SIZE,
-						  &ivgen_ctx->pool_meta_dma,
-						  GFP_KERNEL);
-	if (!ivgen_ctx->pool_meta) {
-		dev_err(device, "Not enough memory to allocate DMA of pool_meta (%u B)\n",
-			CC_IVPOOL_META_SIZE);
-		rc = -ENOMEM;
-		goto out;
-	}
-	/* Allocate IV pool in SRAM */
-	ivgen_ctx->pool = cc_sram_alloc(drvdata, CC_IVPOOL_SIZE);
-	if (ivgen_ctx->pool == NULL_SRAM_ADDR) {
-		dev_err(device, "SRAM pool exhausted\n");
-		rc = -ENOMEM;
-		goto out;
-	}
-
-	return cc_init_iv_sram(drvdata);
-
-out:
-	cc_ivgen_fini(drvdata);
-	return rc;
-}
-
-/*!
- * Acquires 16 Bytes IV from the iv-pool
- *
- * \param drvdata Driver private context
- * \param iv_out_dma Array of physical IV out addresses
- * \param iv_out_dma_len Length of iv_out_dma array (additional elements
- *                       of iv_out_dma array are ignore)
- * \param iv_out_size May be 8 or 16 bytes long
- * \param iv_seq IN/OUT array to the descriptors sequence
- * \param iv_seq_len IN/OUT pointer to the sequence length
- *
- * \return int Zero for success, negative value otherwise.
- */
-int cc_get_iv(struct cc_drvdata *drvdata, dma_addr_t iv_out_dma[],
-	      unsigned int iv_out_dma_len, unsigned int iv_out_size,
-	      struct cc_hw_desc iv_seq[], unsigned int *iv_seq_len)
-{
-	struct cc_ivgen_ctx *ivgen_ctx = drvdata->ivgen_handle;
-	unsigned int idx = *iv_seq_len;
-	struct device *dev = drvdata_to_dev(drvdata);
-	unsigned int t;
-
-	if (iv_out_size != CC_AES_IV_SIZE &&
-	    iv_out_size != CTR_RFC3686_IV_SIZE) {
-		return -EINVAL;
-	}
-	if ((iv_out_dma_len + 1) > CC_IVPOOL_SEQ_LEN) {
-		/* The sequence will be longer than allowed */
-		return -EINVAL;
-	}
-
-	/* check that number of generated IV is limited to max dma address
-	 * iv buffer size
-	 */
-	if (iv_out_dma_len > CC_MAX_IVGEN_DMA_ADDRESSES) {
-		/* The sequence will be longer than allowed */
-		return -EINVAL;
-	}
-
-	for (t = 0; t < iv_out_dma_len; t++) {
-		/* Acquire IV from pool */
-		hw_desc_init(&iv_seq[idx]);
-		set_din_sram(&iv_seq[idx], (ivgen_ctx->pool +
-					    ivgen_ctx->next_iv_ofs),
-			     iv_out_size);
-		set_dout_dlli(&iv_seq[idx], iv_out_dma[t], iv_out_size,
-			      NS_BIT, 0);
-		set_flow_mode(&iv_seq[idx], BYPASS);
-		idx++;
-	}
-
-	/* Bypass operation is proceeded by crypto sequence, hence must
-	 *  assure bypass-write-transaction by a memory barrier
-	 */
-	hw_desc_init(&iv_seq[idx]);
-	set_din_no_dma(&iv_seq[idx], 0, 0xfffff0);
-	set_dout_no_dma(&iv_seq[idx], 0, 0, 1);
-	idx++;
-
-	*iv_seq_len = idx; /* update seq length */
-
-	/* Update iv index */
-	ivgen_ctx->next_iv_ofs += iv_out_size;
-
-	if ((CC_IVPOOL_SIZE - ivgen_ctx->next_iv_ofs) < CC_AES_IV_SIZE) {
-		dev_dbg(dev, "Pool exhausted, regenerating iv-pool\n");
-		/* pool is drained -regenerate it! */
-		return cc_gen_iv_pool(ivgen_ctx, iv_seq, iv_seq_len);
-	}
-
-	return 0;
-}
diff --git a/drivers/crypto/ccree/cc_ivgen.h b/drivers/crypto/ccree/cc_ivgen.h
deleted file mode 100644
index a9f5e8b..0000000
--- a/drivers/crypto/ccree/cc_ivgen.h
+++ /dev/null
@@ -1,55 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (C) 2012-2019 ARM Limited (or its affiliates). */
-
-#ifndef __CC_IVGEN_H__
-#define __CC_IVGEN_H__
-
-#include "cc_hw_queue_defs.h"
-
-#define CC_IVPOOL_SEQ_LEN 8
-
-/*!
- * Allocates iv-pool and maps resources.
- * This function generates the first IV pool.
- *
- * \param drvdata Driver's private context
- *
- * \return int Zero for success, negative value otherwise.
- */
-int cc_ivgen_init(struct cc_drvdata *drvdata);
-
-/*!
- * Free iv-pool and ivgen context.
- *
- * \param drvdata
- */
-void cc_ivgen_fini(struct cc_drvdata *drvdata);
-
-/*!
- * Generates the initial pool in SRAM.
- * This function should be invoked when resuming DX driver.
- *
- * \param drvdata
- *
- * \return int Zero for success, negative value otherwise.
- */
-int cc_init_iv_sram(struct cc_drvdata *drvdata);
-
-/*!
- * Acquires 16 Bytes IV from the iv-pool
- *
- * \param drvdata Driver private context
- * \param iv_out_dma Array of physical IV out addresses
- * \param iv_out_dma_len Length of iv_out_dma array (additional elements of
- *                       iv_out_dma array are ignore)
- * \param iv_out_size May be 8 or 16 bytes long
- * \param iv_seq IN/OUT array to the descriptors sequence
- * \param iv_seq_len IN/OUT pointer to the sequence length
- *
- * \return int Zero for success, negative value otherwise.
- */
-int cc_get_iv(struct cc_drvdata *drvdata, dma_addr_t iv_out_dma[],
-	      unsigned int iv_out_dma_len, unsigned int iv_out_size,
-	      struct cc_hw_desc iv_seq[], unsigned int *iv_seq_len);
-
-#endif /*__CC_IVGEN_H__*/
diff --git a/drivers/crypto/ccree/cc_pm.c b/drivers/crypto/ccree/cc_pm.c
index 899a52f..dbc508f 100644
--- a/drivers/crypto/ccree/cc_pm.c
+++ b/drivers/crypto/ccree/cc_pm.c
@@ -8,7 +8,6 @@
 #include "cc_buffer_mgr.h"
 #include "cc_request_mgr.h"
 #include "cc_sram_mgr.h"
-#include "cc_ivgen.h"
 #include "cc_hash.h"
 #include "cc_pm.h"
 #include "cc_fips.h"
@@ -73,7 +72,6 @@
 	/* must be after the queue resuming as it uses the HW queue*/
 	cc_init_hash_sram(drvdata);
 
-	cc_init_iv_sram(drvdata);
 	return 0;
 }
 
diff --git a/drivers/crypto/ccree/cc_request_mgr.c b/drivers/crypto/ccree/cc_request_mgr.c
index 0bc6ccb..a947d5a 100644
--- a/drivers/crypto/ccree/cc_request_mgr.c
+++ b/drivers/crypto/ccree/cc_request_mgr.c
@@ -6,7 +6,6 @@
 #include "cc_driver.h"
 #include "cc_buffer_mgr.h"
 #include "cc_request_mgr.h"
-#include "cc_ivgen.h"
 #include "cc_pm.h"
 
 #define CC_MAX_POLL_ITER	10
@@ -281,36 +280,12 @@
 static int cc_do_send_request(struct cc_drvdata *drvdata,
 			      struct cc_crypto_req *cc_req,
 			      struct cc_hw_desc *desc, unsigned int len,
-				bool add_comp, bool ivgen)
+				bool add_comp)
 {
 	struct cc_req_mgr_handle *req_mgr_h = drvdata->request_mgr_handle;
 	unsigned int used_sw_slots;
-	unsigned int iv_seq_len = 0;
 	unsigned int total_seq_len = len; /*initial sequence length*/
-	struct cc_hw_desc iv_seq[CC_IVPOOL_SEQ_LEN];
 	struct device *dev = drvdata_to_dev(drvdata);
-	int rc;
-
-	if (ivgen) {
-		dev_dbg(dev, "Acquire IV from pool into %d DMA addresses %pad, %pad, %pad, IV-size=%u\n",
-			cc_req->ivgen_dma_addr_len,
-			&cc_req->ivgen_dma_addr[0],
-			&cc_req->ivgen_dma_addr[1],
-			&cc_req->ivgen_dma_addr[2],
-			cc_req->ivgen_size);
-
-		/* Acquire IV from pool */
-		rc = cc_get_iv(drvdata, cc_req->ivgen_dma_addr,
-			       cc_req->ivgen_dma_addr_len,
-			       cc_req->ivgen_size, iv_seq, &iv_seq_len);
-
-		if (rc) {
-			dev_err(dev, "Failed to generate IV (rc=%d)\n", rc);
-			return rc;
-		}
-
-		total_seq_len += iv_seq_len;
-	}
 
 	used_sw_slots = ((req_mgr_h->req_queue_head -
 			  req_mgr_h->req_queue_tail) &
@@ -334,8 +309,6 @@
 	wmb();
 
 	/* STAT_PHASE_4: Push sequence */
-	if (ivgen)
-		enqueue_seq(drvdata, iv_seq, iv_seq_len);
 
 	enqueue_seq(drvdata, desc, len);
 
@@ -380,8 +353,6 @@
 	struct cc_bl_item *bli;
 	struct cc_crypto_req *creq;
 	void *req;
-	bool ivgen;
-	unsigned int total_len;
 	struct device *dev = drvdata_to_dev(drvdata);
 	int rc;
 
@@ -406,12 +377,9 @@
 			bli->notif = true;
 		}
 
-		ivgen = !!creq->ivgen_dma_addr_len;
-		total_len = bli->len + (ivgen ? CC_IVPOOL_SEQ_LEN : 0);
-
 		spin_lock(&mgr->hw_lock);
 
-		rc = cc_queues_status(drvdata, mgr, total_len);
+		rc = cc_queues_status(drvdata, mgr, bli->len);
 		if (rc) {
 			/*
 			 * There is still not room in the FIFO for
@@ -423,7 +391,7 @@
 		}
 
 		rc = cc_do_send_request(drvdata, &bli->creq, bli->desc,
-					bli->len, false, ivgen);
+					bli->len, false);
 
 		spin_unlock(&mgr->hw_lock);
 
@@ -447,8 +415,6 @@
 {
 	int rc;
 	struct cc_req_mgr_handle *mgr = drvdata->request_mgr_handle;
-	bool ivgen = !!cc_req->ivgen_dma_addr_len;
-	unsigned int total_len = len + (ivgen ? CC_IVPOOL_SEQ_LEN : 0);
 	struct device *dev = drvdata_to_dev(drvdata);
 	bool backlog_ok = req->flags & CRYPTO_TFM_REQ_MAY_BACKLOG;
 	gfp_t flags = cc_gfp_flags(req);
@@ -461,7 +427,7 @@
 	}
 
 	spin_lock_bh(&mgr->hw_lock);
-	rc = cc_queues_status(drvdata, mgr, total_len);
+	rc = cc_queues_status(drvdata, mgr, len);
 
 #ifdef CC_DEBUG_FORCE_BACKLOG
 	if (backlog_ok)
@@ -486,8 +452,7 @@
 	}
 
 	if (!rc)
-		rc = cc_do_send_request(drvdata, cc_req, desc, len, false,
-					ivgen);
+		rc = cc_do_send_request(drvdata, cc_req, desc, len, false);
 
 	spin_unlock_bh(&mgr->hw_lock);
 	return rc;
@@ -527,7 +492,7 @@
 		reinit_completion(&drvdata->hw_queue_avail);
 	}
 
-	rc = cc_do_send_request(drvdata, cc_req, desc, len, true, false);
+	rc = cc_do_send_request(drvdata, cc_req, desc, len, true);
 	spin_unlock_bh(&mgr->hw_lock);
 
 	if (rc != -EINPROGRESS) {
diff --git a/drivers/crypto/chelsio/Kconfig b/drivers/crypto/chelsio/Kconfig
index 4b9b37a..2501505 100644
--- a/drivers/crypto/chelsio/Kconfig
+++ b/drivers/crypto/chelsio/Kconfig
@@ -2,6 +2,7 @@
 config CRYPTO_DEV_CHELSIO
 	tristate "Chelsio Crypto Co-processor Driver"
 	depends on CHELSIO_T4
+	select CRYPTO_LIB_AES
 	select CRYPTO_SHA1
 	select CRYPTO_SHA256
 	select CRYPTO_SHA512
diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 177f572..38ee38b 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -1023,22 +1023,21 @@
 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
 	struct chcr_blkcipher_req_ctx *reqctx = ablkcipher_request_ctx(req);
-	struct crypto_cipher *cipher;
+	struct crypto_aes_ctx aes;
 	int ret, i;
 	u8 *key;
 	unsigned int keylen;
 	int round = reqctx->last_req_len / AES_BLOCK_SIZE;
 	int round8 = round / 8;
 
-	cipher = ablkctx->aes_generic;
 	memcpy(iv, reqctx->iv, AES_BLOCK_SIZE);
 
 	keylen = ablkctx->enckey_len / 2;
 	key = ablkctx->key + keylen;
-	ret = crypto_cipher_setkey(cipher, key, keylen);
+	ret = aes_expandkey(&aes, key, keylen);
 	if (ret)
-		goto out;
-	crypto_cipher_encrypt_one(cipher, iv, iv);
+		return ret;
+	aes_encrypt(&aes, iv, iv);
 	for (i = 0; i < round8; i++)
 		gf128mul_x8_ble((le128 *)iv, (le128 *)iv);
 
@@ -1046,9 +1045,10 @@
 		gf128mul_x_ble((le128 *)iv, (le128 *)iv);
 
 	if (!isfinal)
-		crypto_cipher_decrypt_one(cipher, iv, iv);
-out:
-	return ret;
+		aes_decrypt(&aes, iv, iv);
+
+	memzero_explicit(&aes, sizeof(aes));
+	return 0;
 }
 
 static int chcr_update_cipher_iv(struct ablkcipher_request *req,
@@ -1411,16 +1411,6 @@
 		return PTR_ERR(ablkctx->sw_cipher);
 	}
 
-	if (get_cryptoalg_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_XTS) {
-		/* To update tweak*/
-		ablkctx->aes_generic = crypto_alloc_cipher("aes-generic", 0, 0);
-		if (IS_ERR(ablkctx->aes_generic)) {
-			pr_err("failed to allocate aes cipher for tweak\n");
-			return PTR_ERR(ablkctx->aes_generic);
-		}
-	} else
-		ablkctx->aes_generic = NULL;
-
 	tfm->crt_ablkcipher.reqsize =  sizeof(struct chcr_blkcipher_req_ctx);
 	return chcr_device_init(crypto_tfm_ctx(tfm));
 }
@@ -1451,8 +1441,6 @@
 	struct ablk_ctx *ablkctx = ABLK_CTX(ctx);
 
 	crypto_free_sync_skcipher(ablkctx->sw_cipher);
-	if (ablkctx->aes_generic)
-		crypto_free_cipher(ablkctx->aes_generic);
 }
 
 static int get_alg_config(struct algo_param *params,
@@ -3364,9 +3352,9 @@
 {
 	struct chcr_aead_ctx *aeadctx = AEAD_CTX(a_ctx(aead));
 	struct chcr_gcm_ctx *gctx = GCM_CTX(aeadctx);
-	struct crypto_cipher *cipher;
 	unsigned int ck_size;
 	int ret = 0, key_ctx_size = 0;
+	struct crypto_aes_ctx aes;
 
 	aeadctx->enckey_len = 0;
 	crypto_aead_clear_flags(aeadctx->sw_cipher, CRYPTO_TFM_REQ_MASK);
@@ -3409,23 +3397,15 @@
 	/* Calculate the H = CIPH(K, 0 repeated 16 times).
 	 * It will go in key context
 	 */
-	cipher = crypto_alloc_cipher("aes-generic", 0, 0);
-	if (IS_ERR(cipher)) {
-		aeadctx->enckey_len = 0;
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = crypto_cipher_setkey(cipher, key, keylen);
+	ret = aes_expandkey(&aes, key, keylen);
 	if (ret) {
 		aeadctx->enckey_len = 0;
-		goto out1;
+		goto out;
 	}
 	memset(gctx->ghash_h, 0, AEAD_H_SIZE);
-	crypto_cipher_encrypt_one(cipher, gctx->ghash_h, gctx->ghash_h);
+	aes_encrypt(&aes, gctx->ghash_h, gctx->ghash_h);
+	memzero_explicit(&aes, sizeof(aes));
 
-out1:
-	crypto_free_cipher(cipher);
 out:
 	return ret;
 }
diff --git a/drivers/crypto/chelsio/chcr_algo.h b/drivers/crypto/chelsio/chcr_algo.h
index ee20dd8..d1e6b51 100644
--- a/drivers/crypto/chelsio/chcr_algo.h
+++ b/drivers/crypto/chelsio/chcr_algo.h
@@ -333,26 +333,26 @@
 };
 
 
-static const u32 sha1_init[SHA1_DIGEST_SIZE / 4] = {
+static const u32 chcr_sha1_init[SHA1_DIGEST_SIZE / 4] = {
 		SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4,
 };
 
-static const u32 sha224_init[SHA256_DIGEST_SIZE / 4] = {
+static const u32 chcr_sha224_init[SHA256_DIGEST_SIZE / 4] = {
 		SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
 		SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
 };
 
-static const u32 sha256_init[SHA256_DIGEST_SIZE / 4] = {
+static const u32 chcr_sha256_init[SHA256_DIGEST_SIZE / 4] = {
 		SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
 		SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
 };
 
-static const u64 sha384_init[SHA512_DIGEST_SIZE / 8] = {
+static const u64 chcr_sha384_init[SHA512_DIGEST_SIZE / 8] = {
 		SHA384_H0, SHA384_H1, SHA384_H2, SHA384_H3,
 		SHA384_H4, SHA384_H5, SHA384_H6, SHA384_H7,
 };
 
-static const u64 sha512_init[SHA512_DIGEST_SIZE / 8] = {
+static const u64 chcr_sha512_init[SHA512_DIGEST_SIZE / 8] = {
 		SHA512_H0, SHA512_H1, SHA512_H2, SHA512_H3,
 		SHA512_H4, SHA512_H5, SHA512_H6, SHA512_H7,
 };
@@ -362,21 +362,21 @@
 	u8 i;
 	__be32 *dkey = (__be32 *)key;
 	u64 *ldkey = (u64 *)key;
-	__be64 *sha384 = (__be64 *)sha384_init;
-	__be64 *sha512 = (__be64 *)sha512_init;
+	__be64 *sha384 = (__be64 *)chcr_sha384_init;
+	__be64 *sha512 = (__be64 *)chcr_sha512_init;
 
 	switch (digestsize) {
 	case SHA1_DIGEST_SIZE:
 		for (i = 0; i < SHA1_INIT_STATE; i++)
-			dkey[i] = cpu_to_be32(sha1_init[i]);
+			dkey[i] = cpu_to_be32(chcr_sha1_init[i]);
 		break;
 	case SHA224_DIGEST_SIZE:
 		for (i = 0; i < SHA224_INIT_STATE; i++)
-			dkey[i] = cpu_to_be32(sha224_init[i]);
+			dkey[i] = cpu_to_be32(chcr_sha224_init[i]);
 		break;
 	case SHA256_DIGEST_SIZE:
 		for (i = 0; i < SHA256_INIT_STATE; i++)
-			dkey[i] = cpu_to_be32(sha256_init[i]);
+			dkey[i] = cpu_to_be32(chcr_sha256_init[i]);
 		break;
 	case SHA384_DIGEST_SIZE:
 		for (i = 0; i < SHA384_INIT_STATE; i++)
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 655606f..993c97e 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -172,7 +172,6 @@
 
 struct ablk_ctx {
 	struct crypto_sync_skcipher *sw_cipher;
-	struct crypto_cipher *aes_generic;
 	__be32 key_ctx_hdr;
 	unsigned int enckey_len;
 	unsigned char ciph_mode;
diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c
index f429aae..2435568 100644
--- a/drivers/crypto/chelsio/chcr_ipsec.c
+++ b/drivers/crypto/chelsio/chcr_ipsec.c
@@ -132,11 +132,11 @@
 static inline int chcr_ipsec_setkey(struct xfrm_state *x,
 				    struct ipsec_sa_entry *sa_entry)
 {
-	struct crypto_cipher *cipher;
 	int keylen = (x->aead->alg_key_len + 7) / 8;
 	unsigned char *key = x->aead->alg_key;
 	int ck_size, key_ctx_size = 0;
 	unsigned char ghash_h[AEAD_H_SIZE];
+	struct crypto_aes_ctx aes;
 	int ret = 0;
 
 	if (keylen > 3) {
@@ -170,26 +170,19 @@
 	/* Calculate the H = CIPH(K, 0 repeated 16 times).
 	 * It will go in key context
 	 */
-	cipher = crypto_alloc_cipher("aes-generic", 0, 0);
-	if (IS_ERR(cipher)) {
-		sa_entry->enckey_len = 0;
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = crypto_cipher_setkey(cipher, key, keylen);
+	ret = aes_expandkey(&aes, key, keylen);
 	if (ret) {
 		sa_entry->enckey_len = 0;
-		goto out1;
+		goto out;
 	}
 	memset(ghash_h, 0, AEAD_H_SIZE);
-	crypto_cipher_encrypt_one(cipher, ghash_h, ghash_h);
+	aes_encrypt(&aes, ghash_h, ghash_h);
+	memzero_explicit(&aes, sizeof(aes));
+
 	memcpy(sa_entry->key + (DIV_ROUND_UP(sa_entry->enckey_len, 16) *
 	       16), ghash_h, AEAD_H_SIZE);
 	sa_entry->kctx_len = ((DIV_ROUND_UP(sa_entry->enckey_len, 16)) << 4) +
 			      AEAD_H_SIZE;
-out1:
-	crypto_free_cipher(cipher);
 out:
 	return ret;
 }
diff --git a/drivers/crypto/chelsio/chtls/chtls_hw.c b/drivers/crypto/chelsio/chtls/chtls_hw.c
index f2424f4..2a34035 100644
--- a/drivers/crypto/chelsio/chtls/chtls_hw.c
+++ b/drivers/crypto/chelsio/chtls/chtls_hw.c
@@ -213,8 +213,8 @@
 	unsigned char key[AES_KEYSIZE_128];
 	struct tls12_crypto_info_aes_gcm_128 *gcm_ctx;
 	unsigned char ghash_h[AEAD_H_SIZE];
-	struct crypto_cipher *cipher;
 	int ck_size, key_ctx_size;
+	struct crypto_aes_ctx aes;
 	int ret;
 
 	gcm_ctx = (struct tls12_crypto_info_aes_gcm_128 *)
@@ -234,18 +234,13 @@
 	/* Calculate the H = CIPH(K, 0 repeated 16 times).
 	 * It will go in key context
 	 */
-	cipher = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(cipher)) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = crypto_cipher_setkey(cipher, key, keylen);
+	ret = aes_expandkey(&aes, key, keylen);
 	if (ret)
-		goto out1;
+		return ret;
 
 	memset(ghash_h, 0, AEAD_H_SIZE);
-	crypto_cipher_encrypt_one(cipher, ghash_h, ghash_h);
+	aes_encrypt(&aes, ghash_h, ghash_h);
+	memzero_explicit(&aes, sizeof(aes));
 	csk->tlshws.keylen = key_ctx_size;
 
 	/* Copy the Key context */
@@ -269,10 +264,7 @@
 	/* erase key info from driver */
 	memset(gcm_ctx->key, 0, keylen);
 
-out1:
-	crypto_free_cipher(cipher);
-out:
-	return ret;
+	return 0;
 }
 
 static void chtls_set_scmd(struct chtls_sock *csk)
diff --git a/drivers/crypto/exynos-rng.c b/drivers/crypto/exynos-rng.c
index 2cfabb9..cbd8ca6 100644
--- a/drivers/crypto/exynos-rng.c
+++ b/drivers/crypto/exynos-rng.c
@@ -268,7 +268,6 @@
 static int exynos_rng_probe(struct platform_device *pdev)
 {
 	struct exynos_rng_dev *rng;
-	struct resource *res;
 	int ret;
 
 	if (exynos_rng_dev)
@@ -289,8 +288,7 @@
 		return PTR_ERR(rng->clk);
 	}
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	rng->mem = devm_ioremap_resource(&pdev->dev, res);
+	rng->mem = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(rng->mem))
 		return PTR_ERR(rng->mem);
 
diff --git a/drivers/crypto/hifn_795x.c b/drivers/crypto/hifn_795x.c
index 5c3f02e..a18e62d 100644
--- a/drivers/crypto/hifn_795x.c
+++ b/drivers/crypto/hifn_795x.c
@@ -21,7 +21,7 @@
 #include <linux/ktime.h>
 
 #include <crypto/algapi.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 
 static char hifn_pll_ref[sizeof("extNNN")] = "ext";
 module_param_string(hifn_pll_ref, hifn_pll_ref, sizeof(hifn_pll_ref), 0444);
@@ -1939,25 +1939,13 @@
 static int hifn_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 		unsigned int len)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct hifn_context *ctx = crypto_tfm_ctx(tfm);
+	struct hifn_context *ctx = crypto_ablkcipher_ctx(cipher);
 	struct hifn_device *dev = ctx->dev;
+	int err;
 
-	if (len > HIFN_MAX_CRYPT_KEY_LENGTH) {
-		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -1;
-	}
-
-	if (len == HIFN_DES_KEY_LENGTH) {
-		u32 tmp[DES_EXPKEY_WORDS];
-		int ret = des_ekey(tmp, key);
-
-		if (unlikely(ret == 0) &&
-		    (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-			tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
-			return -EINVAL;
-		}
-	}
+	err = verify_ablkcipher_des_key(cipher, key);
+	if (err)
+		return err;
 
 	dev->flags &= ~HIFN_FLAG_OLD_KEY;
 
@@ -1972,15 +1960,11 @@
 {
 	struct hifn_context *ctx = crypto_ablkcipher_ctx(cipher);
 	struct hifn_device *dev = ctx->dev;
-	u32 flags;
 	int err;
 
-	flags = crypto_ablkcipher_get_flags(cipher);
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(cipher, flags);
+	err = verify_ablkcipher_des3_key(cipher, key);
+	if (err)
 		return err;
-	}
 
 	dev->flags &= ~HIFN_FLAG_OLD_KEY;
 
diff --git a/drivers/crypto/hisilicon/Kconfig b/drivers/crypto/hisilicon/Kconfig
index 8ca9c50..ebaf91e 100644
--- a/drivers/crypto/hisilicon/Kconfig
+++ b/drivers/crypto/hisilicon/Kconfig
@@ -4,6 +4,7 @@
 	tristate "Support for Hisilicon SEC crypto block cipher accelerator"
 	select CRYPTO_BLKCIPHER
 	select CRYPTO_ALGAPI
+	select CRYPTO_LIB_DES
 	select SG_SPLIT
 	depends on ARM64 || COMPILE_TEST
 	depends on HAS_IOMEM
@@ -12,3 +13,27 @@
 
 	  To compile this as a module, choose M here: the module
 	  will be called hisi_sec.
+
+config CRYPTO_DEV_HISI_QM
+	tristate
+	depends on ARM64 && PCI && PCI_MSI
+	help
+	  HiSilicon accelerator engines use a common queue management
+	  interface. Specific engine driver may use this module.
+
+config CRYPTO_HISI_SGL
+	tristate
+	depends on ARM64
+	help
+	  HiSilicon accelerator engines use a common hardware scatterlist
+	  interface for data format. Specific engine driver may use this
+	  module.
+
+config CRYPTO_DEV_HISI_ZIP
+	tristate "Support for HiSilicon ZIP accelerator"
+	depends on ARM64 && PCI && PCI_MSI
+	select CRYPTO_DEV_HISI_QM
+	select CRYPTO_HISI_SGL
+	select SG_SPLIT
+	help
+	  Support for HiSilicon ZIP Driver
diff --git a/drivers/crypto/hisilicon/Makefile b/drivers/crypto/hisilicon/Makefile
index 463f46a..45a2797 100644
--- a/drivers/crypto/hisilicon/Makefile
+++ b/drivers/crypto/hisilicon/Makefile
@@ -1,2 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_CRYPTO_DEV_HISI_SEC) += sec/
+obj-$(CONFIG_CRYPTO_DEV_HISI_QM) += qm.o
+obj-$(CONFIG_CRYPTO_HISI_SGL) += sgl.o
+obj-$(CONFIG_CRYPTO_DEV_HISI_ZIP) += zip/
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
new file mode 100644
index 0000000..f975c39
--- /dev/null
+++ b/drivers/crypto/hisilicon/qm.c
@@ -0,0 +1,1913 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 HiSilicon Limited. */
+#include <asm/page.h>
+#include <linux/bitmap.h>
+#include <linux/debugfs.h>
+#include <linux/dma-mapping.h>
+#include <linux/io.h>
+#include <linux/irqreturn.h>
+#include <linux/log2.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include "qm.h"
+
+/* eq/aeq irq enable */
+#define QM_VF_AEQ_INT_SOURCE		0x0
+#define QM_VF_AEQ_INT_MASK		0x4
+#define QM_VF_EQ_INT_SOURCE		0x8
+#define QM_VF_EQ_INT_MASK		0xc
+#define QM_IRQ_NUM_V1			1
+#define QM_IRQ_NUM_PF_V2		4
+#define QM_IRQ_NUM_VF_V2		2
+
+#define QM_EQ_EVENT_IRQ_VECTOR		0
+#define QM_AEQ_EVENT_IRQ_VECTOR		1
+#define QM_ABNORMAL_EVENT_IRQ_VECTOR	3
+
+/* mailbox */
+#define QM_MB_CMD_SQC			0x0
+#define QM_MB_CMD_CQC			0x1
+#define QM_MB_CMD_EQC			0x2
+#define QM_MB_CMD_AEQC			0x3
+#define QM_MB_CMD_SQC_BT		0x4
+#define QM_MB_CMD_CQC_BT		0x5
+#define QM_MB_CMD_SQC_VFT_V2		0x6
+
+#define QM_MB_CMD_SEND_BASE		0x300
+#define QM_MB_EVENT_SHIFT		8
+#define QM_MB_BUSY_SHIFT		13
+#define QM_MB_OP_SHIFT			14
+#define QM_MB_CMD_DATA_ADDR_L		0x304
+#define QM_MB_CMD_DATA_ADDR_H		0x308
+
+/* sqc shift */
+#define QM_SQ_HOP_NUM_SHIFT		0
+#define QM_SQ_PAGE_SIZE_SHIFT		4
+#define QM_SQ_BUF_SIZE_SHIFT		8
+#define QM_SQ_SQE_SIZE_SHIFT		12
+#define QM_SQ_PRIORITY_SHIFT		0
+#define QM_SQ_ORDERS_SHIFT		4
+#define QM_SQ_TYPE_SHIFT		8
+
+#define QM_SQ_TYPE_MASK			GENMASK(3, 0)
+
+/* cqc shift */
+#define QM_CQ_HOP_NUM_SHIFT		0
+#define QM_CQ_PAGE_SIZE_SHIFT		4
+#define QM_CQ_BUF_SIZE_SHIFT		8
+#define QM_CQ_CQE_SIZE_SHIFT		12
+#define QM_CQ_PHASE_SHIFT		0
+#define QM_CQ_FLAG_SHIFT		1
+
+#define QM_CQE_PHASE(cqe)		((cqe)->w7 & 0x1)
+#define QM_QC_CQE_SIZE			4
+
+/* eqc shift */
+#define QM_EQE_AEQE_SIZE		(2UL << 12)
+#define QM_EQC_PHASE_SHIFT		16
+
+#define QM_EQE_PHASE(eqe)		(((eqe)->dw0 >> 16) & 0x1)
+#define QM_EQE_CQN_MASK			GENMASK(15, 0)
+
+#define QM_AEQE_PHASE(aeqe)		(((aeqe)->dw0 >> 16) & 0x1)
+#define QM_AEQE_TYPE_SHIFT		17
+
+#define QM_DOORBELL_CMD_SQ		0
+#define QM_DOORBELL_CMD_CQ		1
+#define QM_DOORBELL_CMD_EQ		2
+#define QM_DOORBELL_CMD_AEQ		3
+
+#define QM_DOORBELL_BASE_V1		0x340
+#define QM_DB_CMD_SHIFT_V1		16
+#define QM_DB_INDEX_SHIFT_V1		32
+#define QM_DB_PRIORITY_SHIFT_V1		48
+#define QM_DOORBELL_SQ_CQ_BASE_V2	0x1000
+#define QM_DOORBELL_EQ_AEQ_BASE_V2	0x2000
+#define QM_DB_CMD_SHIFT_V2		12
+#define QM_DB_RAND_SHIFT_V2		16
+#define QM_DB_INDEX_SHIFT_V2		32
+#define QM_DB_PRIORITY_SHIFT_V2		48
+
+#define QM_MEM_START_INIT		0x100040
+#define QM_MEM_INIT_DONE		0x100044
+#define QM_VFT_CFG_RDY			0x10006c
+#define QM_VFT_CFG_OP_WR		0x100058
+#define QM_VFT_CFG_TYPE			0x10005c
+#define QM_SQC_VFT			0x0
+#define QM_CQC_VFT			0x1
+#define QM_VFT_CFG			0x100060
+#define QM_VFT_CFG_OP_ENABLE		0x100054
+
+#define QM_VFT_CFG_DATA_L		0x100064
+#define QM_VFT_CFG_DATA_H		0x100068
+#define QM_SQC_VFT_BUF_SIZE		(7ULL << 8)
+#define QM_SQC_VFT_SQC_SIZE		(5ULL << 12)
+#define QM_SQC_VFT_INDEX_NUMBER		(1ULL << 16)
+#define QM_SQC_VFT_START_SQN_SHIFT	28
+#define QM_SQC_VFT_VALID		(1ULL << 44)
+#define QM_SQC_VFT_SQN_SHIFT		45
+#define QM_CQC_VFT_BUF_SIZE		(7ULL << 8)
+#define QM_CQC_VFT_SQC_SIZE		(5ULL << 12)
+#define QM_CQC_VFT_INDEX_NUMBER		(1ULL << 16)
+#define QM_CQC_VFT_VALID		(1ULL << 28)
+
+#define QM_SQC_VFT_BASE_SHIFT_V2	28
+#define QM_SQC_VFT_BASE_MASK_V2		GENMASK(5, 0)
+#define QM_SQC_VFT_NUM_SHIFT_V2		45
+#define QM_SQC_VFT_NUM_MASK_v2		GENMASK(9, 0)
+
+#define QM_DFX_CNT_CLR_CE		0x100118
+
+#define QM_ABNORMAL_INT_SOURCE		0x100000
+#define QM_ABNORMAL_INT_MASK		0x100004
+#define QM_ABNORMAL_INT_MASK_VALUE	0x1fff
+#define QM_ABNORMAL_INT_STATUS		0x100008
+#define QM_ABNORMAL_INF00		0x100010
+#define QM_FIFO_OVERFLOW_TYPE		0xc0
+#define QM_FIFO_OVERFLOW_TYPE_SHIFT	6
+#define QM_FIFO_OVERFLOW_VF		0x3f
+#define QM_ABNORMAL_INF01		0x100014
+#define QM_DB_TIMEOUT_TYPE		0xc0
+#define QM_DB_TIMEOUT_TYPE_SHIFT	6
+#define QM_DB_TIMEOUT_VF		0x3f
+#define QM_RAS_CE_ENABLE		0x1000ec
+#define QM_RAS_FE_ENABLE		0x1000f0
+#define QM_RAS_NFE_ENABLE		0x1000f4
+#define QM_RAS_CE_THRESHOLD		0x1000f8
+#define QM_RAS_CE_TIMES_PER_IRQ		1
+#define QM_RAS_MSI_INT_SEL		0x1040f4
+
+#define QM_CACHE_WB_START		0x204
+#define QM_CACHE_WB_DONE		0x208
+
+#define PCI_BAR_2			2
+#define QM_SQE_DATA_ALIGN_MASK		GENMASK(6, 0)
+#define QMC_ALIGN(sz)			ALIGN(sz, 32)
+
+#define QM_DBG_TMP_BUF_LEN		22
+
+#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
+	(((hop_num) << QM_CQ_HOP_NUM_SHIFT)	| \
+	((pg_sz) << QM_CQ_PAGE_SIZE_SHIFT)	| \
+	((buf_sz) << QM_CQ_BUF_SIZE_SHIFT)	| \
+	((cqe_sz) << QM_CQ_CQE_SIZE_SHIFT))
+
+#define QM_MK_CQC_DW3_V2(cqe_sz) \
+	((QM_Q_DEPTH - 1) | ((cqe_sz) << QM_CQ_CQE_SIZE_SHIFT))
+
+#define QM_MK_SQC_W13(priority, orders, alg_type) \
+	(((priority) << QM_SQ_PRIORITY_SHIFT)	| \
+	((orders) << QM_SQ_ORDERS_SHIFT)	| \
+	(((alg_type) & QM_SQ_TYPE_MASK) << QM_SQ_TYPE_SHIFT))
+
+#define QM_MK_SQC_DW3_V1(hop_num, pg_sz, buf_sz, sqe_sz) \
+	(((hop_num) << QM_SQ_HOP_NUM_SHIFT)	| \
+	((pg_sz) << QM_SQ_PAGE_SIZE_SHIFT)	| \
+	((buf_sz) << QM_SQ_BUF_SIZE_SHIFT)	| \
+	((u32)ilog2(sqe_sz) << QM_SQ_SQE_SIZE_SHIFT))
+
+#define QM_MK_SQC_DW3_V2(sqe_sz) \
+	((QM_Q_DEPTH - 1) | ((u32)ilog2(sqe_sz) << QM_SQ_SQE_SIZE_SHIFT))
+
+#define INIT_QC_COMMON(qc, base, pasid) do {	\
+	(qc)->head = 0;				\
+	(qc)->tail = 0;				\
+	(qc)->base_l = lower_32_bits(base);	\
+	(qc)->base_h = upper_32_bits(base);	\
+	(qc)->dw3 = 0;				\
+	(qc)->w8 = 0;				\
+	(qc)->rsvd0 = 0;			\
+	(qc)->pasid = pasid;			\
+	(qc)->w11 = 0;				\
+	(qc)->rsvd1 = 0;			\
+} while (0)
+
+enum vft_type {
+	SQC_VFT = 0,
+	CQC_VFT,
+};
+
+struct qm_cqe {
+	__le32 rsvd0;
+	__le16 cmd_id;
+	__le16 rsvd1;
+	__le16 sq_head;
+	__le16 sq_num;
+	__le16 rsvd2;
+	__le16 w7;
+};
+
+struct qm_eqe {
+	__le32 dw0;
+};
+
+struct qm_aeqe {
+	__le32 dw0;
+};
+
+struct qm_sqc {
+	__le16 head;
+	__le16 tail;
+	__le32 base_l;
+	__le32 base_h;
+	__le32 dw3;
+	__le16 w8;
+	__le16 rsvd0;
+	__le16 pasid;
+	__le16 w11;
+	__le16 cq_num;
+	__le16 w13;
+	__le32 rsvd1;
+};
+
+struct qm_cqc {
+	__le16 head;
+	__le16 tail;
+	__le32 base_l;
+	__le32 base_h;
+	__le32 dw3;
+	__le16 w8;
+	__le16 rsvd0;
+	__le16 pasid;
+	__le16 w11;
+	__le32 dw6;
+	__le32 rsvd1;
+};
+
+struct qm_eqc {
+	__le16 head;
+	__le16 tail;
+	__le32 base_l;
+	__le32 base_h;
+	__le32 dw3;
+	__le32 rsvd[2];
+	__le32 dw6;
+};
+
+struct qm_aeqc {
+	__le16 head;
+	__le16 tail;
+	__le32 base_l;
+	__le32 base_h;
+	__le32 dw3;
+	__le32 rsvd[2];
+	__le32 dw6;
+};
+
+struct qm_mailbox {
+	__le16 w0;
+	__le16 queue_num;
+	__le32 base_l;
+	__le32 base_h;
+	__le32 rsvd;
+};
+
+struct qm_doorbell {
+	__le16 queue_num;
+	__le16 cmd;
+	__le16 index;
+	__le16 priority;
+};
+
+struct hisi_qm_hw_ops {
+	int (*get_vft)(struct hisi_qm *qm, u32 *base, u32 *number);
+	void (*qm_db)(struct hisi_qm *qm, u16 qn,
+		      u8 cmd, u16 index, u8 priority);
+	u32 (*get_irq_num)(struct hisi_qm *qm);
+	int (*debug_init)(struct hisi_qm *qm);
+	void (*hw_error_init)(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
+			      u32 msi);
+	pci_ers_result_t (*hw_error_handle)(struct hisi_qm *qm);
+};
+
+static const char * const qm_debug_file_name[] = {
+	[CURRENT_Q]    = "current_q",
+	[CLEAR_ENABLE] = "clear_enable",
+};
+
+struct hisi_qm_hw_error {
+	u32 int_msk;
+	const char *msg;
+};
+
+static const struct hisi_qm_hw_error qm_hw_error[] = {
+	{ .int_msk = BIT(0), .msg = "qm_axi_rresp" },
+	{ .int_msk = BIT(1), .msg = "qm_axi_bresp" },
+	{ .int_msk = BIT(2), .msg = "qm_ecc_mbit" },
+	{ .int_msk = BIT(3), .msg = "qm_ecc_1bit" },
+	{ .int_msk = BIT(4), .msg = "qm_acc_get_task_timeout" },
+	{ .int_msk = BIT(5), .msg = "qm_acc_do_task_timeout" },
+	{ .int_msk = BIT(6), .msg = "qm_acc_wb_not_ready_timeout" },
+	{ .int_msk = BIT(7), .msg = "qm_sq_cq_vf_invalid" },
+	{ .int_msk = BIT(8), .msg = "qm_cq_vf_invalid" },
+	{ .int_msk = BIT(9), .msg = "qm_sq_vf_invalid" },
+	{ .int_msk = BIT(10), .msg = "qm_db_timeout" },
+	{ .int_msk = BIT(11), .msg = "qm_of_fifo_of" },
+	{ .int_msk = BIT(12), .msg = "qm_db_random_invalid" },
+	{ /* sentinel */ }
+};
+
+static const char * const qm_db_timeout[] = {
+	"sq", "cq", "eq", "aeq",
+};
+
+static const char * const qm_fifo_overflow[] = {
+	"cq", "eq", "aeq",
+};
+
+/* return 0 mailbox ready, -ETIMEDOUT hardware timeout */
+static int qm_wait_mb_ready(struct hisi_qm *qm)
+{
+	u32 val;
+
+	return readl_relaxed_poll_timeout(qm->io_base + QM_MB_CMD_SEND_BASE,
+					  val, !((val >> QM_MB_BUSY_SHIFT) &
+					  0x1), 10, 1000);
+}
+
+/* 128 bit should be written to hardware at one time to trigger a mailbox */
+static void qm_mb_write(struct hisi_qm *qm, const void *src)
+{
+	void __iomem *fun_base = qm->io_base + QM_MB_CMD_SEND_BASE;
+	unsigned long tmp0 = 0, tmp1 = 0;
+
+	asm volatile("ldp %0, %1, %3\n"
+		     "stp %0, %1, %2\n"
+		     "dsb sy\n"
+		     : "=&r" (tmp0),
+		       "=&r" (tmp1),
+		       "+Q" (*((char *)fun_base))
+		     : "Q" (*((char *)src))
+		     : "memory");
+}
+
+static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
+		 bool op)
+{
+	struct qm_mailbox mailbox;
+	int ret = 0;
+
+	dev_dbg(&qm->pdev->dev, "QM mailbox request to q%u: %u-%llx\n",
+		queue, cmd, (unsigned long long)dma_addr);
+
+	mailbox.w0 = cmd |
+		     (op ? 0x1 << QM_MB_OP_SHIFT : 0) |
+		     (0x1 << QM_MB_BUSY_SHIFT);
+	mailbox.queue_num = queue;
+	mailbox.base_l = lower_32_bits(dma_addr);
+	mailbox.base_h = upper_32_bits(dma_addr);
+	mailbox.rsvd = 0;
+
+	mutex_lock(&qm->mailbox_lock);
+
+	if (unlikely(qm_wait_mb_ready(qm))) {
+		ret = -EBUSY;
+		dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n");
+		goto busy_unlock;
+	}
+
+	qm_mb_write(qm, &mailbox);
+
+	if (unlikely(qm_wait_mb_ready(qm))) {
+		ret = -EBUSY;
+		dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n");
+		goto busy_unlock;
+	}
+
+busy_unlock:
+	mutex_unlock(&qm->mailbox_lock);
+
+	return ret;
+}
+
+static void qm_db_v1(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority)
+{
+	u64 doorbell;
+
+	doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V1) |
+		   ((u64)index << QM_DB_INDEX_SHIFT_V1)  |
+		   ((u64)priority << QM_DB_PRIORITY_SHIFT_V1);
+
+	writeq(doorbell, qm->io_base + QM_DOORBELL_BASE_V1);
+}
+
+static void qm_db_v2(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority)
+{
+	u64 doorbell;
+	u64 dbase;
+	u16 randata = 0;
+
+	if (cmd == QM_DOORBELL_CMD_SQ || cmd == QM_DOORBELL_CMD_CQ)
+		dbase = QM_DOORBELL_SQ_CQ_BASE_V2;
+	else
+		dbase = QM_DOORBELL_EQ_AEQ_BASE_V2;
+
+	doorbell = qn | ((u64)cmd << QM_DB_CMD_SHIFT_V2) |
+		   ((u64)randata << QM_DB_RAND_SHIFT_V2) |
+		   ((u64)index << QM_DB_INDEX_SHIFT_V2)	 |
+		   ((u64)priority << QM_DB_PRIORITY_SHIFT_V2);
+
+	writeq(doorbell, qm->io_base + dbase);
+}
+
+static void qm_db(struct hisi_qm *qm, u16 qn, u8 cmd, u16 index, u8 priority)
+{
+	dev_dbg(&qm->pdev->dev, "QM doorbell request: qn=%u, cmd=%u, index=%u\n",
+		qn, cmd, index);
+
+	qm->ops->qm_db(qm, qn, cmd, index, priority);
+}
+
+static int qm_dev_mem_reset(struct hisi_qm *qm)
+{
+	u32 val;
+
+	writel(0x1, qm->io_base + QM_MEM_START_INIT);
+	return readl_relaxed_poll_timeout(qm->io_base + QM_MEM_INIT_DONE, val,
+					  val & BIT(0), 10, 1000);
+}
+
+static u32 qm_get_irq_num_v1(struct hisi_qm *qm)
+{
+	return QM_IRQ_NUM_V1;
+}
+
+static u32 qm_get_irq_num_v2(struct hisi_qm *qm)
+{
+	if (qm->fun_type == QM_HW_PF)
+		return QM_IRQ_NUM_PF_V2;
+	else
+		return QM_IRQ_NUM_VF_V2;
+}
+
+static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
+{
+	u16 cqn = eqe->dw0 & QM_EQE_CQN_MASK;
+
+	return qm->qp_array[cqn];
+}
+
+static void qm_cq_head_update(struct hisi_qp *qp)
+{
+	if (qp->qp_status.cq_head == QM_Q_DEPTH - 1) {
+		qp->qp_status.cqc_phase = !qp->qp_status.cqc_phase;
+		qp->qp_status.cq_head = 0;
+	} else {
+		qp->qp_status.cq_head++;
+	}
+}
+
+static void qm_poll_qp(struct hisi_qp *qp, struct hisi_qm *qm)
+{
+	struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
+
+	if (qp->req_cb) {
+		while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
+			dma_rmb();
+			qp->req_cb(qp, qp->sqe + qm->sqe_size * cqe->sq_head);
+			qm_cq_head_update(qp);
+			cqe = qp->cqe + qp->qp_status.cq_head;
+			qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
+			      qp->qp_status.cq_head, 0);
+			atomic_dec(&qp->qp_status.used);
+		}
+
+		/* set c_flag */
+		qm_db(qm, qp->qp_id, QM_DOORBELL_CMD_CQ,
+		      qp->qp_status.cq_head, 1);
+	}
+}
+
+static void qm_qp_work_func(struct work_struct *work)
+{
+	struct hisi_qp *qp;
+
+	qp = container_of(work, struct hisi_qp, work);
+	qm_poll_qp(qp, qp->qm);
+}
+
+static irqreturn_t qm_irq_handler(int irq, void *data)
+{
+	struct hisi_qm *qm = data;
+	struct qm_eqe *eqe = qm->eqe + qm->status.eq_head;
+	struct hisi_qp *qp;
+	int eqe_num = 0;
+
+	while (QM_EQE_PHASE(eqe) == qm->status.eqc_phase) {
+		eqe_num++;
+		qp = qm_to_hisi_qp(qm, eqe);
+		if (qp)
+			queue_work(qp->wq, &qp->work);
+
+		if (qm->status.eq_head == QM_Q_DEPTH - 1) {
+			qm->status.eqc_phase = !qm->status.eqc_phase;
+			eqe = qm->eqe;
+			qm->status.eq_head = 0;
+		} else {
+			eqe++;
+			qm->status.eq_head++;
+		}
+
+		if (eqe_num == QM_Q_DEPTH / 2 - 1) {
+			eqe_num = 0;
+			qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+		}
+	}
+
+	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t qm_irq(int irq, void *data)
+{
+	struct hisi_qm *qm = data;
+
+	if (readl(qm->io_base + QM_VF_EQ_INT_SOURCE))
+		return qm_irq_handler(irq, data);
+
+	dev_err(&qm->pdev->dev, "invalid int source\n");
+	qm_db(qm, 0, QM_DOORBELL_CMD_EQ, qm->status.eq_head, 0);
+
+	return IRQ_NONE;
+}
+
+static irqreturn_t qm_aeq_irq(int irq, void *data)
+{
+	struct hisi_qm *qm = data;
+	struct qm_aeqe *aeqe = qm->aeqe + qm->status.aeq_head;
+	u32 type;
+
+	if (!readl(qm->io_base + QM_VF_AEQ_INT_SOURCE))
+		return IRQ_NONE;
+
+	while (QM_AEQE_PHASE(aeqe) == qm->status.aeqc_phase) {
+		type = aeqe->dw0 >> QM_AEQE_TYPE_SHIFT;
+		if (type < ARRAY_SIZE(qm_fifo_overflow))
+			dev_err(&qm->pdev->dev, "%s overflow\n",
+				qm_fifo_overflow[type]);
+		else
+			dev_err(&qm->pdev->dev, "unknown error type %d\n",
+				type);
+
+		if (qm->status.aeq_head == QM_Q_DEPTH - 1) {
+			qm->status.aeqc_phase = !qm->status.aeqc_phase;
+			aeqe = qm->aeqe;
+			qm->status.aeq_head = 0;
+		} else {
+			aeqe++;
+			qm->status.aeq_head++;
+		}
+
+		qm_db(qm, 0, QM_DOORBELL_CMD_AEQ, qm->status.aeq_head, 0);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t qm_abnormal_irq(int irq, void *data)
+{
+	const struct hisi_qm_hw_error *err = qm_hw_error;
+	struct hisi_qm *qm = data;
+	struct device *dev = &qm->pdev->dev;
+	u32 error_status, tmp;
+
+	/* read err sts */
+	tmp = readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
+	error_status = qm->msi_mask & tmp;
+
+	while (err->msg) {
+		if (err->int_msk & error_status)
+			dev_err(dev, "%s [error status=0x%x] found\n",
+				err->msg, err->int_msk);
+
+		err++;
+	}
+
+	/* clear err sts */
+	writel(error_status, qm->io_base + QM_ABNORMAL_INT_SOURCE);
+
+	return IRQ_HANDLED;
+}
+
+static int qm_irq_register(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	int ret;
+
+	ret = request_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR),
+			  qm_irq, IRQF_SHARED, qm->dev_name, qm);
+	if (ret)
+		return ret;
+
+	if (qm->ver == QM_HW_V2) {
+		ret = request_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR),
+				  qm_aeq_irq, IRQF_SHARED, qm->dev_name, qm);
+		if (ret)
+			goto err_aeq_irq;
+
+		if (qm->fun_type == QM_HW_PF) {
+			ret = request_irq(pci_irq_vector(pdev,
+					  QM_ABNORMAL_EVENT_IRQ_VECTOR),
+					  qm_abnormal_irq, IRQF_SHARED,
+					  qm->dev_name, qm);
+			if (ret)
+				goto err_abonormal_irq;
+		}
+	}
+
+	return 0;
+
+err_abonormal_irq:
+	free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
+err_aeq_irq:
+	free_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR), qm);
+	return ret;
+}
+
+static void qm_irq_unregister(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+
+	free_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR), qm);
+
+	if (qm->ver == QM_HW_V2) {
+		free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
+
+		if (qm->fun_type == QM_HW_PF)
+			free_irq(pci_irq_vector(pdev,
+				 QM_ABNORMAL_EVENT_IRQ_VECTOR), qm);
+	}
+}
+
+static void qm_init_qp_status(struct hisi_qp *qp)
+{
+	struct hisi_qp_status *qp_status = &qp->qp_status;
+
+	qp_status->sq_tail = 0;
+	qp_status->cq_head = 0;
+	qp_status->cqc_phase = 1;
+	qp_status->flags = 0;
+}
+
+static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
+			    u32 number)
+{
+	u64 tmp = 0;
+
+	if (number > 0) {
+		switch (type) {
+		case SQC_VFT:
+			switch (qm->ver) {
+			case QM_HW_V1:
+				tmp = QM_SQC_VFT_BUF_SIZE	|
+				      QM_SQC_VFT_SQC_SIZE	|
+				      QM_SQC_VFT_INDEX_NUMBER	|
+				      QM_SQC_VFT_VALID		|
+				      (u64)base << QM_SQC_VFT_START_SQN_SHIFT;
+				break;
+			case QM_HW_V2:
+				tmp = (u64)base << QM_SQC_VFT_START_SQN_SHIFT |
+				      QM_SQC_VFT_VALID |
+				      (u64)(number - 1) << QM_SQC_VFT_SQN_SHIFT;
+				break;
+			case QM_HW_UNKNOWN:
+				break;
+			}
+			break;
+		case CQC_VFT:
+			switch (qm->ver) {
+			case QM_HW_V1:
+				tmp = QM_CQC_VFT_BUF_SIZE	|
+				      QM_CQC_VFT_SQC_SIZE	|
+				      QM_CQC_VFT_INDEX_NUMBER	|
+				      QM_CQC_VFT_VALID;
+				break;
+			case QM_HW_V2:
+				tmp = QM_CQC_VFT_VALID;
+				break;
+			case QM_HW_UNKNOWN:
+				break;
+			}
+			break;
+		}
+	}
+
+	writel(lower_32_bits(tmp), qm->io_base + QM_VFT_CFG_DATA_L);
+	writel(upper_32_bits(tmp), qm->io_base + QM_VFT_CFG_DATA_H);
+}
+
+static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type,
+			     u32 fun_num, u32 base, u32 number)
+{
+	unsigned int val;
+	int ret;
+
+	ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+					 val & BIT(0), 10, 1000);
+	if (ret)
+		return ret;
+
+	writel(0x0, qm->io_base + QM_VFT_CFG_OP_WR);
+	writel(type, qm->io_base + QM_VFT_CFG_TYPE);
+	writel(fun_num, qm->io_base + QM_VFT_CFG);
+
+	qm_vft_data_cfg(qm, type, base, number);
+
+	writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
+	writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
+
+	return readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+					  val & BIT(0), 10, 1000);
+}
+
+/* The config should be conducted after qm_dev_mem_reset() */
+static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
+			      u32 number)
+{
+	int ret, i;
+
+	for (i = SQC_VFT; i <= CQC_VFT; i++) {
+		ret = qm_set_vft_common(qm, i, fun_num, base, number);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number)
+{
+	u64 sqc_vft;
+	int ret;
+
+	ret = qm_mb(qm, QM_MB_CMD_SQC_VFT_V2, 0, 0, 1);
+	if (ret)
+		return ret;
+
+	sqc_vft = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+		  ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32);
+	*base = QM_SQC_VFT_BASE_MASK_V2 & (sqc_vft >> QM_SQC_VFT_BASE_SHIFT_V2);
+	*number = (QM_SQC_VFT_NUM_MASK_v2 &
+		   (sqc_vft >> QM_SQC_VFT_NUM_SHIFT_V2)) + 1;
+
+	return 0;
+}
+
+static struct hisi_qm *file_to_qm(struct debugfs_file *file)
+{
+	struct qm_debug *debug = file->debug;
+
+	return container_of(debug, struct hisi_qm, debug);
+}
+
+static u32 current_q_read(struct debugfs_file *file)
+{
+	struct hisi_qm *qm = file_to_qm(file);
+
+	return readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) >> QM_DFX_QN_SHIFT;
+}
+
+static int current_q_write(struct debugfs_file *file, u32 val)
+{
+	struct hisi_qm *qm = file_to_qm(file);
+	u32 tmp;
+
+	if (val >= qm->debug.curr_qm_qp_num)
+		return -EINVAL;
+
+	tmp = val << QM_DFX_QN_SHIFT |
+	      (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_FUN_MASK);
+	writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+
+	tmp = val << QM_DFX_QN_SHIFT |
+	      (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_FUN_MASK);
+	writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+	return 0;
+}
+
+static u32 clear_enable_read(struct debugfs_file *file)
+{
+	struct hisi_qm *qm = file_to_qm(file);
+
+	return readl(qm->io_base + QM_DFX_CNT_CLR_CE);
+}
+
+/* rd_clr_ctrl 1 enable read clear, otherwise 0 disable it */
+static int clear_enable_write(struct debugfs_file *file, u32 rd_clr_ctrl)
+{
+	struct hisi_qm *qm = file_to_qm(file);
+
+	if (rd_clr_ctrl > 1)
+		return -EINVAL;
+
+	writel(rd_clr_ctrl, qm->io_base + QM_DFX_CNT_CLR_CE);
+
+	return 0;
+}
+
+static ssize_t qm_debug_read(struct file *filp, char __user *buf,
+			     size_t count, loff_t *pos)
+{
+	struct debugfs_file *file = filp->private_data;
+	enum qm_debug_file index = file->index;
+	char tbuf[QM_DBG_TMP_BUF_LEN];
+	u32 val;
+	int ret;
+
+	mutex_lock(&file->lock);
+	switch (index) {
+	case CURRENT_Q:
+		val = current_q_read(file);
+		break;
+	case CLEAR_ENABLE:
+		val = clear_enable_read(file);
+		break;
+	default:
+		mutex_unlock(&file->lock);
+		return -EINVAL;
+	}
+	mutex_unlock(&file->lock);
+	ret = sprintf(tbuf, "%u\n", val);
+	return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static ssize_t qm_debug_write(struct file *filp, const char __user *buf,
+			      size_t count, loff_t *pos)
+{
+	struct debugfs_file *file = filp->private_data;
+	enum qm_debug_file index = file->index;
+	unsigned long val;
+	char tbuf[QM_DBG_TMP_BUF_LEN];
+	int len, ret;
+
+	if (*pos != 0)
+		return 0;
+
+	if (count >= QM_DBG_TMP_BUF_LEN)
+		return -ENOSPC;
+
+	len = simple_write_to_buffer(tbuf, QM_DBG_TMP_BUF_LEN - 1, pos, buf,
+				     count);
+	if (len < 0)
+		return len;
+
+	tbuf[len] = '\0';
+	if (kstrtoul(tbuf, 0, &val))
+		return -EFAULT;
+
+	mutex_lock(&file->lock);
+	switch (index) {
+	case CURRENT_Q:
+		ret = current_q_write(file, val);
+		if (ret)
+			goto err_input;
+		break;
+	case CLEAR_ENABLE:
+		ret = clear_enable_write(file, val);
+		if (ret)
+			goto err_input;
+		break;
+	default:
+		ret = -EINVAL;
+		goto err_input;
+	}
+	mutex_unlock(&file->lock);
+
+	return count;
+
+err_input:
+	mutex_unlock(&file->lock);
+	return ret;
+}
+
+static const struct file_operations qm_debug_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = qm_debug_read,
+	.write = qm_debug_write,
+};
+
+struct qm_dfx_registers {
+	char  *reg_name;
+	u64   reg_offset;
+};
+
+#define CNT_CYC_REGS_NUM		10
+static struct qm_dfx_registers qm_dfx_regs[] = {
+	/* XXX_CNT are reading clear register */
+	{"QM_ECC_1BIT_CNT               ",  0x104000ull},
+	{"QM_ECC_MBIT_CNT               ",  0x104008ull},
+	{"QM_DFX_MB_CNT                 ",  0x104018ull},
+	{"QM_DFX_DB_CNT                 ",  0x104028ull},
+	{"QM_DFX_SQE_CNT                ",  0x104038ull},
+	{"QM_DFX_CQE_CNT                ",  0x104048ull},
+	{"QM_DFX_SEND_SQE_TO_ACC_CNT    ",  0x104050ull},
+	{"QM_DFX_WB_SQE_FROM_ACC_CNT    ",  0x104058ull},
+	{"QM_DFX_ACC_FINISH_CNT         ",  0x104060ull},
+	{"QM_DFX_CQE_ERR_CNT            ",  0x1040b4ull},
+	{"QM_DFX_FUNS_ACTIVE_ST         ",  0x200ull},
+	{"QM_ECC_1BIT_INF               ",  0x104004ull},
+	{"QM_ECC_MBIT_INF               ",  0x10400cull},
+	{"QM_DFX_ACC_RDY_VLD0           ",  0x1040a0ull},
+	{"QM_DFX_ACC_RDY_VLD1           ",  0x1040a4ull},
+	{"QM_DFX_AXI_RDY_VLD            ",  0x1040a8ull},
+	{"QM_DFX_FF_ST0                 ",  0x1040c8ull},
+	{"QM_DFX_FF_ST1                 ",  0x1040ccull},
+	{"QM_DFX_FF_ST2                 ",  0x1040d0ull},
+	{"QM_DFX_FF_ST3                 ",  0x1040d4ull},
+	{"QM_DFX_FF_ST4                 ",  0x1040d8ull},
+	{"QM_DFX_FF_ST5                 ",  0x1040dcull},
+	{"QM_DFX_FF_ST6                 ",  0x1040e0ull},
+	{"QM_IN_IDLE_ST                 ",  0x1040e4ull},
+	{ NULL, 0}
+};
+
+static struct qm_dfx_registers qm_vf_dfx_regs[] = {
+	{"QM_DFX_FUNS_ACTIVE_ST         ",  0x200ull},
+	{ NULL, 0}
+};
+
+static int qm_regs_show(struct seq_file *s, void *unused)
+{
+	struct hisi_qm *qm = s->private;
+	struct qm_dfx_registers *regs;
+	u32 val;
+
+	if (qm->fun_type == QM_HW_PF)
+		regs = qm_dfx_regs;
+	else
+		regs = qm_vf_dfx_regs;
+
+	while (regs->reg_name) {
+		val = readl(qm->io_base + regs->reg_offset);
+		seq_printf(s, "%s= 0x%08x\n", regs->reg_name, val);
+		regs++;
+	}
+
+	return 0;
+}
+
+static int qm_regs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, qm_regs_show, inode->i_private);
+}
+
+static const struct file_operations qm_regs_fops = {
+	.owner = THIS_MODULE,
+	.open = qm_regs_open,
+	.read = seq_read,
+	.release = single_release,
+};
+
+static int qm_create_debugfs_file(struct hisi_qm *qm, enum qm_debug_file index)
+{
+	struct dentry *qm_d = qm->debug.qm_d, *tmp;
+	struct debugfs_file *file = qm->debug.files + index;
+
+	tmp = debugfs_create_file(qm_debug_file_name[index], 0600, qm_d, file,
+				  &qm_debug_fops);
+	if (IS_ERR(tmp))
+		return -ENOENT;
+
+	file->index = index;
+	mutex_init(&file->lock);
+	file->debug = &qm->debug;
+
+	return 0;
+}
+
+static void qm_hw_error_init_v1(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
+				u32 msi)
+{
+	dev_info(&qm->pdev->dev,
+		 "QM v%d does not support hw error handle\n", qm->ver);
+
+	writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
+}
+
+static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
+				u32 msi)
+{
+	u32 irq_enable = ce | nfe | fe | msi;
+	u32 irq_unmask = ~irq_enable;
+
+	qm->error_mask = ce | nfe | fe;
+	qm->msi_mask = msi;
+
+	/* configure error type */
+	writel(ce, qm->io_base + QM_RAS_CE_ENABLE);
+	writel(QM_RAS_CE_TIMES_PER_IRQ, qm->io_base + QM_RAS_CE_THRESHOLD);
+	writel(nfe, qm->io_base + QM_RAS_NFE_ENABLE);
+	writel(fe, qm->io_base + QM_RAS_FE_ENABLE);
+
+	/* use RAS irq default, so only set QM_RAS_MSI_INT_SEL for MSI */
+	writel(msi, qm->io_base + QM_RAS_MSI_INT_SEL);
+
+	irq_unmask &= readl(qm->io_base + QM_ABNORMAL_INT_MASK);
+	writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK);
+}
+
+static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status)
+{
+	const struct hisi_qm_hw_error *err = qm_hw_error;
+	struct device *dev = &qm->pdev->dev;
+	u32 reg_val, type, vf_num;
+
+	while (err->msg) {
+		if (err->int_msk & error_status) {
+			dev_err(dev, "%s [error status=0x%x] found\n",
+				err->msg, err->int_msk);
+
+			if (error_status & QM_DB_TIMEOUT) {
+				reg_val = readl(qm->io_base +
+						QM_ABNORMAL_INF01);
+				type = (reg_val & QM_DB_TIMEOUT_TYPE) >>
+				       QM_DB_TIMEOUT_TYPE_SHIFT;
+				vf_num = reg_val & QM_DB_TIMEOUT_VF;
+				dev_err(dev, "qm %s doorbell timeout in function %u\n",
+					qm_db_timeout[type], vf_num);
+			}
+
+			if (error_status & QM_OF_FIFO_OF) {
+				reg_val = readl(qm->io_base +
+						QM_ABNORMAL_INF00);
+				type = (reg_val & QM_FIFO_OVERFLOW_TYPE) >>
+				       QM_FIFO_OVERFLOW_TYPE_SHIFT;
+				vf_num = reg_val & QM_FIFO_OVERFLOW_VF;
+
+				if (type < ARRAY_SIZE(qm_fifo_overflow))
+					dev_err(dev, "qm %s fifo overflow in function %u\n",
+						qm_fifo_overflow[type],
+						vf_num);
+				else
+					dev_err(dev, "unknown error type\n");
+			}
+		}
+		err++;
+	}
+}
+
+static pci_ers_result_t qm_hw_error_handle_v2(struct hisi_qm *qm)
+{
+	u32 error_status, tmp;
+
+	/* read err sts */
+	tmp = readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
+	error_status = qm->error_mask & tmp;
+
+	if (error_status) {
+		qm_log_hw_error(qm, error_status);
+
+		/* clear err sts */
+		writel(error_status, qm->io_base + QM_ABNORMAL_INT_SOURCE);
+
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static const struct hisi_qm_hw_ops qm_hw_ops_v1 = {
+	.qm_db = qm_db_v1,
+	.get_irq_num = qm_get_irq_num_v1,
+	.hw_error_init = qm_hw_error_init_v1,
+};
+
+static const struct hisi_qm_hw_ops qm_hw_ops_v2 = {
+	.get_vft = qm_get_vft_v2,
+	.qm_db = qm_db_v2,
+	.get_irq_num = qm_get_irq_num_v2,
+	.hw_error_init = qm_hw_error_init_v2,
+	.hw_error_handle = qm_hw_error_handle_v2,
+};
+
+static void *qm_get_avail_sqe(struct hisi_qp *qp)
+{
+	struct hisi_qp_status *qp_status = &qp->qp_status;
+	u16 sq_tail = qp_status->sq_tail;
+
+	if (unlikely(atomic_read(&qp->qp_status.used) == QM_Q_DEPTH))
+		return NULL;
+
+	return qp->sqe + sq_tail * qp->qm->sqe_size;
+}
+
+/**
+ * hisi_qm_create_qp() - Create a queue pair from qm.
+ * @qm: The qm we create a qp from.
+ * @alg_type: Accelerator specific algorithm type in sqc.
+ *
+ * return created qp, -EBUSY if all qps in qm allocated, -ENOMEM if allocating
+ * qp memory fails.
+ */
+struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct hisi_qp *qp;
+	int qp_id, ret;
+
+	qp = kzalloc(sizeof(*qp), GFP_KERNEL);
+	if (!qp)
+		return ERR_PTR(-ENOMEM);
+
+	write_lock(&qm->qps_lock);
+
+	qp_id = find_first_zero_bit(qm->qp_bitmap, qm->qp_num);
+	if (qp_id >= qm->qp_num) {
+		write_unlock(&qm->qps_lock);
+		dev_info(&qm->pdev->dev, "QM all queues are busy!\n");
+		ret = -EBUSY;
+		goto err_free_qp;
+	}
+	set_bit(qp_id, qm->qp_bitmap);
+	qm->qp_array[qp_id] = qp;
+
+	write_unlock(&qm->qps_lock);
+
+	qp->qm = qm;
+
+	if (qm->use_dma_api) {
+		qp->qdma.size = qm->sqe_size * QM_Q_DEPTH +
+				sizeof(struct qm_cqe) * QM_Q_DEPTH;
+		qp->qdma.va = dma_alloc_coherent(dev, qp->qdma.size,
+						 &qp->qdma.dma, GFP_KERNEL);
+		if (!qp->qdma.va) {
+			ret = -ENOMEM;
+			goto err_clear_bit;
+		}
+
+		dev_dbg(dev, "allocate qp dma buf(va=%pK, dma=%pad, size=%zx)\n",
+			qp->qdma.va, &qp->qdma.dma, qp->qdma.size);
+	}
+
+	qp->qp_id = qp_id;
+	qp->alg_type = alg_type;
+	INIT_WORK(&qp->work, qm_qp_work_func);
+	qp->wq = alloc_workqueue("hisi_qm", WQ_UNBOUND | WQ_HIGHPRI |
+				 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 0);
+	if (!qp->wq) {
+		ret = -EFAULT;
+		goto err_free_qp_mem;
+	}
+
+	return qp;
+
+err_free_qp_mem:
+	if (qm->use_dma_api)
+		dma_free_coherent(dev, qp->qdma.size, qp->qdma.va,
+				  qp->qdma.dma);
+err_clear_bit:
+	write_lock(&qm->qps_lock);
+	qm->qp_array[qp_id] = NULL;
+	clear_bit(qp_id, qm->qp_bitmap);
+	write_unlock(&qm->qps_lock);
+err_free_qp:
+	kfree(qp);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_create_qp);
+
+/**
+ * hisi_qm_release_qp() - Release a qp back to its qm.
+ * @qp: The qp we want to release.
+ *
+ * This function releases the resource of a qp.
+ */
+void hisi_qm_release_qp(struct hisi_qp *qp)
+{
+	struct hisi_qm *qm = qp->qm;
+	struct qm_dma *qdma = &qp->qdma;
+	struct device *dev = &qm->pdev->dev;
+
+	if (qm->use_dma_api && qdma->va)
+		dma_free_coherent(dev, qdma->size, qdma->va, qdma->dma);
+
+	write_lock(&qm->qps_lock);
+	qm->qp_array[qp->qp_id] = NULL;
+	clear_bit(qp->qp_id, qm->qp_bitmap);
+	write_unlock(&qm->qps_lock);
+
+	kfree(qp);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_release_qp);
+
+static int qm_qp_ctx_cfg(struct hisi_qp *qp, int qp_id, int pasid)
+{
+	struct hisi_qm *qm = qp->qm;
+	struct device *dev = &qm->pdev->dev;
+	enum qm_hw_ver ver = qm->ver;
+	struct qm_sqc *sqc;
+	struct qm_cqc *cqc;
+	dma_addr_t sqc_dma;
+	dma_addr_t cqc_dma;
+	int ret;
+
+	qm_init_qp_status(qp);
+
+	sqc = kzalloc(sizeof(struct qm_sqc), GFP_KERNEL);
+	if (!sqc)
+		return -ENOMEM;
+	sqc_dma = dma_map_single(dev, sqc, sizeof(struct qm_sqc),
+				 DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, sqc_dma)) {
+		kfree(sqc);
+		return -ENOMEM;
+	}
+
+	INIT_QC_COMMON(sqc, qp->sqe_dma, pasid);
+	if (ver == QM_HW_V1) {
+		sqc->dw3 = QM_MK_SQC_DW3_V1(0, 0, 0, qm->sqe_size);
+		sqc->w8 = QM_Q_DEPTH - 1;
+	} else if (ver == QM_HW_V2) {
+		sqc->dw3 = QM_MK_SQC_DW3_V2(qm->sqe_size);
+		sqc->w8 = 0; /* rand_qc */
+	}
+	sqc->cq_num = qp_id;
+	sqc->w13 = QM_MK_SQC_W13(0, 1, qp->alg_type);
+
+	ret = qm_mb(qm, QM_MB_CMD_SQC, sqc_dma, qp_id, 0);
+	dma_unmap_single(dev, sqc_dma, sizeof(struct qm_sqc), DMA_TO_DEVICE);
+	kfree(sqc);
+	if (ret)
+		return ret;
+
+	cqc = kzalloc(sizeof(struct qm_cqc), GFP_KERNEL);
+	if (!cqc)
+		return -ENOMEM;
+	cqc_dma = dma_map_single(dev, cqc, sizeof(struct qm_cqc),
+				 DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, cqc_dma)) {
+		kfree(cqc);
+		return -ENOMEM;
+	}
+
+	INIT_QC_COMMON(cqc, qp->cqe_dma, pasid);
+	if (ver == QM_HW_V1) {
+		cqc->dw3 = QM_MK_CQC_DW3_V1(0, 0, 0, 4);
+		cqc->w8 = QM_Q_DEPTH - 1;
+	} else if (ver == QM_HW_V2) {
+		cqc->dw3 = QM_MK_CQC_DW3_V2(4);
+		cqc->w8 = 0;
+	}
+	cqc->dw6 = 1 << QM_CQ_PHASE_SHIFT | 1 << QM_CQ_FLAG_SHIFT;
+
+	ret = qm_mb(qm, QM_MB_CMD_CQC, cqc_dma, qp_id, 0);
+	dma_unmap_single(dev, cqc_dma, sizeof(struct qm_cqc), DMA_TO_DEVICE);
+	kfree(cqc);
+
+	return ret;
+}
+
+/**
+ * hisi_qm_start_qp() - Start a qp into running.
+ * @qp: The qp we want to start to run.
+ * @arg: Accelerator specific argument.
+ *
+ * After this function, qp can receive request from user. Return qp_id if
+ * successful, Return -EBUSY if failed.
+ */
+int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg)
+{
+	struct hisi_qm *qm = qp->qm;
+	struct device *dev = &qm->pdev->dev;
+	enum qm_hw_ver ver = qm->ver;
+	int qp_id = qp->qp_id;
+	int pasid = arg;
+	size_t off = 0;
+	int ret;
+
+#define QP_INIT_BUF(qp, type, size) do { \
+	(qp)->type = ((qp)->qdma.va + (off)); \
+	(qp)->type##_dma = (qp)->qdma.dma + (off); \
+	off += (size); \
+} while (0)
+
+	if (!qp->qdma.dma) {
+		dev_err(dev, "cannot get qm dma buffer\n");
+		return -EINVAL;
+	}
+
+	/* sq need 128 bytes alignment */
+	if (qp->qdma.dma & QM_SQE_DATA_ALIGN_MASK) {
+		dev_err(dev, "qm sq is not aligned to 128 byte\n");
+		return -EINVAL;
+	}
+
+	QP_INIT_BUF(qp, sqe, qm->sqe_size * QM_Q_DEPTH);
+	QP_INIT_BUF(qp, cqe, sizeof(struct qm_cqe) * QM_Q_DEPTH);
+
+	dev_dbg(dev, "init qp buffer(v%d):\n"
+		     " sqe	(%pK, %lx)\n"
+		     " cqe	(%pK, %lx)\n",
+		     ver, qp->sqe, (unsigned long)qp->sqe_dma,
+		     qp->cqe, (unsigned long)qp->cqe_dma);
+
+	ret = qm_qp_ctx_cfg(qp, qp_id, pasid);
+	if (ret)
+		return ret;
+
+	dev_dbg(dev, "queue %d started\n", qp_id);
+
+	return qp_id;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_start_qp);
+
+/**
+ * hisi_qm_stop_qp() - Stop a qp in qm.
+ * @qp: The qp we want to stop.
+ *
+ * This function is reverse of hisi_qm_start_qp. Return 0 if successful.
+ */
+int hisi_qm_stop_qp(struct hisi_qp *qp)
+{
+	struct device *dev = &qp->qm->pdev->dev;
+	int i = 0;
+
+	/* it is stopped */
+	if (test_bit(QP_STOP, &qp->qp_status.flags))
+		return 0;
+
+	while (atomic_read(&qp->qp_status.used)) {
+		i++;
+		msleep(20);
+		if (i == 10) {
+			dev_err(dev, "Cannot drain out data for stopping, Force to stop!\n");
+			return 0;
+		}
+	}
+
+	set_bit(QP_STOP, &qp->qp_status.flags);
+
+	dev_dbg(dev, "stop queue %u!", qp->qp_id);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_stop_qp);
+
+/**
+ * hisi_qp_send() - Queue up a task in the hardware queue.
+ * @qp: The qp in which to put the message.
+ * @msg: The message.
+ *
+ * This function will return -EBUSY if qp is currently full, and -EAGAIN
+ * if qp related qm is resetting.
+ */
+int hisi_qp_send(struct hisi_qp *qp, const void *msg)
+{
+	struct hisi_qp_status *qp_status = &qp->qp_status;
+	u16 sq_tail = qp_status->sq_tail;
+	u16 sq_tail_next = (sq_tail + 1) % QM_Q_DEPTH;
+	void *sqe = qm_get_avail_sqe(qp);
+
+	if (unlikely(test_bit(QP_STOP, &qp->qp_status.flags))) {
+		dev_info(&qp->qm->pdev->dev, "QP is stopped or resetting\n");
+		return -EAGAIN;
+	}
+
+	if (!sqe)
+		return -EBUSY;
+
+	memcpy(sqe, msg, qp->qm->sqe_size);
+
+	qm_db(qp->qm, qp->qp_id, QM_DOORBELL_CMD_SQ, sq_tail_next, 0);
+	atomic_inc(&qp->qp_status.used);
+	qp_status->sq_tail = sq_tail_next;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_qp_send);
+
+static void hisi_qm_cache_wb(struct hisi_qm *qm)
+{
+	unsigned int val;
+
+	if (qm->ver == QM_HW_V2) {
+		writel(0x1, qm->io_base + QM_CACHE_WB_START);
+		if (readl_relaxed_poll_timeout(qm->io_base + QM_CACHE_WB_DONE,
+					       val, val & BIT(0), 10, 1000))
+			dev_err(&qm->pdev->dev, "QM writeback sqc cache fail!\n");
+	}
+}
+
+/**
+ * hisi_qm_init() - Initialize configures about qm.
+ * @qm: The qm needing init.
+ *
+ * This function init qm, then we can call hisi_qm_start to put qm into work.
+ */
+int hisi_qm_init(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	struct device *dev = &pdev->dev;
+	unsigned int num_vec;
+	int ret;
+
+	switch (qm->ver) {
+	case QM_HW_V1:
+		qm->ops = &qm_hw_ops_v1;
+		break;
+	case QM_HW_V2:
+		qm->ops = &qm_hw_ops_v2;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = pci_enable_device_mem(pdev);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to enable device mem!\n");
+		return ret;
+	}
+
+	ret = pci_request_mem_regions(pdev, qm->dev_name);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "Failed to request mem regions!\n");
+		goto err_disable_pcidev;
+	}
+
+	qm->io_base = ioremap(pci_resource_start(pdev, PCI_BAR_2),
+			      pci_resource_len(qm->pdev, PCI_BAR_2));
+	if (!qm->io_base) {
+		ret = -EIO;
+		goto err_release_mem_regions;
+	}
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+	if (ret < 0)
+		goto err_iounmap;
+	pci_set_master(pdev);
+
+	if (!qm->ops->get_irq_num) {
+		ret = -EOPNOTSUPP;
+		goto err_iounmap;
+	}
+	num_vec = qm->ops->get_irq_num(qm);
+	ret = pci_alloc_irq_vectors(pdev, num_vec, num_vec, PCI_IRQ_MSI);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enable MSI vectors!\n");
+		goto err_iounmap;
+	}
+
+	ret = qm_irq_register(qm);
+	if (ret)
+		goto err_free_irq_vectors;
+
+	mutex_init(&qm->mailbox_lock);
+	rwlock_init(&qm->qps_lock);
+
+	dev_dbg(dev, "init qm %s with %s\n", pdev->is_physfn ? "pf" : "vf",
+		qm->use_dma_api ? "dma api" : "iommu api");
+
+	return 0;
+
+err_free_irq_vectors:
+	pci_free_irq_vectors(pdev);
+err_iounmap:
+	iounmap(qm->io_base);
+err_release_mem_regions:
+	pci_release_mem_regions(pdev);
+err_disable_pcidev:
+	pci_disable_device(pdev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_init);
+
+/**
+ * hisi_qm_uninit() - Uninitialize qm.
+ * @qm: The qm needed uninit.
+ *
+ * This function uninits qm related device resources.
+ */
+void hisi_qm_uninit(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	struct device *dev = &pdev->dev;
+
+	if (qm->use_dma_api && qm->qdma.va) {
+		hisi_qm_cache_wb(qm);
+		dma_free_coherent(dev, qm->qdma.size,
+				  qm->qdma.va, qm->qdma.dma);
+		memset(&qm->qdma, 0, sizeof(qm->qdma));
+	}
+
+	qm_irq_unregister(qm);
+	pci_free_irq_vectors(pdev);
+	iounmap(qm->io_base);
+	pci_release_mem_regions(pdev);
+	pci_disable_device(pdev);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_uninit);
+
+/**
+ * hisi_qm_get_vft() - Get vft from a qm.
+ * @qm: The qm we want to get its vft.
+ * @base: The base number of queue in vft.
+ * @number: The number of queues in vft.
+ *
+ * We can allocate multiple queues to a qm by configuring virtual function
+ * table. We get related configures by this function. Normally, we call this
+ * function in VF driver to get the queue information.
+ *
+ * qm hw v1 does not support this interface.
+ */
+int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number)
+{
+	if (!base || !number)
+		return -EINVAL;
+
+	if (!qm->ops->get_vft) {
+		dev_err(&qm->pdev->dev, "Don't support vft read!\n");
+		return -EINVAL;
+	}
+
+	return qm->ops->get_vft(qm, base, number);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_get_vft);
+
+/**
+ * hisi_qm_set_vft() - Set "virtual function table" for a qm.
+ * @fun_num: Number of operated function.
+ * @qm: The qm in which to set vft, alway in a PF.
+ * @base: The base number of queue in vft.
+ * @number: The number of queues in vft. 0 means invalid vft.
+ *
+ * This function is alway called in PF driver, it is used to assign queues
+ * among PF and VFs.
+ *
+ * Assign queues A~B to PF: hisi_qm_set_vft(qm, 0, A, B - A + 1)
+ * Assign queues A~B to VF: hisi_qm_set_vft(qm, 2, A, B - A + 1)
+ * (VF function number 0x2)
+ */
+int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
+		    u32 number)
+{
+	u32 max_q_num = qm->ctrl_qp_num;
+
+	if (base >= max_q_num || number > max_q_num ||
+	    (base + number) > max_q_num)
+		return -EINVAL;
+
+	return qm_set_sqc_cqc_vft(qm, fun_num, base, number);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_set_vft);
+
+static void qm_init_eq_aeq_status(struct hisi_qm *qm)
+{
+	struct hisi_qm_status *status = &qm->status;
+
+	status->eq_head = 0;
+	status->aeq_head = 0;
+	status->eqc_phase = 1;
+	status->aeqc_phase = 1;
+}
+
+static int qm_eq_ctx_cfg(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+	struct qm_eqc *eqc;
+	struct qm_aeqc *aeqc;
+	dma_addr_t eqc_dma;
+	dma_addr_t aeqc_dma;
+	int ret;
+
+	qm_init_eq_aeq_status(qm);
+
+	eqc = kzalloc(sizeof(struct qm_eqc), GFP_KERNEL);
+	if (!eqc)
+		return -ENOMEM;
+	eqc_dma = dma_map_single(dev, eqc, sizeof(struct qm_eqc),
+				 DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, eqc_dma)) {
+		kfree(eqc);
+		return -ENOMEM;
+	}
+
+	eqc->base_l = lower_32_bits(qm->eqe_dma);
+	eqc->base_h = upper_32_bits(qm->eqe_dma);
+	if (qm->ver == QM_HW_V1)
+		eqc->dw3 = QM_EQE_AEQE_SIZE;
+	eqc->dw6 = (QM_Q_DEPTH - 1) | (1 << QM_EQC_PHASE_SHIFT);
+	ret = qm_mb(qm, QM_MB_CMD_EQC, eqc_dma, 0, 0);
+	dma_unmap_single(dev, eqc_dma, sizeof(struct qm_eqc), DMA_TO_DEVICE);
+	kfree(eqc);
+	if (ret)
+		return ret;
+
+	aeqc = kzalloc(sizeof(struct qm_aeqc), GFP_KERNEL);
+	if (!aeqc)
+		return -ENOMEM;
+	aeqc_dma = dma_map_single(dev, aeqc, sizeof(struct qm_aeqc),
+				  DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, aeqc_dma)) {
+		kfree(aeqc);
+		return -ENOMEM;
+	}
+
+	aeqc->base_l = lower_32_bits(qm->aeqe_dma);
+	aeqc->base_h = upper_32_bits(qm->aeqe_dma);
+	aeqc->dw6 = (QM_Q_DEPTH - 1) | (1 << QM_EQC_PHASE_SHIFT);
+
+	ret = qm_mb(qm, QM_MB_CMD_AEQC, aeqc_dma, 0, 0);
+	dma_unmap_single(dev, aeqc_dma, sizeof(struct qm_aeqc), DMA_TO_DEVICE);
+	kfree(aeqc);
+
+	return ret;
+}
+
+static int __hisi_qm_start(struct hisi_qm *qm)
+{
+	struct pci_dev *pdev = qm->pdev;
+	struct device *dev = &pdev->dev;
+	size_t off = 0;
+	int ret;
+
+#define QM_INIT_BUF(qm, type, num) do { \
+	(qm)->type = ((qm)->qdma.va + (off)); \
+	(qm)->type##_dma = (qm)->qdma.dma + (off); \
+	off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \
+} while (0)
+
+	WARN_ON(!qm->qdma.dma);
+
+	if (qm->qp_num == 0)
+		return -EINVAL;
+
+	if (qm->fun_type == QM_HW_PF) {
+		ret = qm_dev_mem_reset(qm);
+		if (ret)
+			return ret;
+
+		ret = hisi_qm_set_vft(qm, 0, qm->qp_base, qm->qp_num);
+		if (ret)
+			return ret;
+	}
+
+	QM_INIT_BUF(qm, eqe, QM_Q_DEPTH);
+	QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH);
+	QM_INIT_BUF(qm, sqc, qm->qp_num);
+	QM_INIT_BUF(qm, cqc, qm->qp_num);
+
+	dev_dbg(dev, "init qm buffer:\n"
+		     " eqe	(%pK, %lx)\n"
+		     " aeqe	(%pK, %lx)\n"
+		     " sqc	(%pK, %lx)\n"
+		     " cqc	(%pK, %lx)\n",
+		     qm->eqe, (unsigned long)qm->eqe_dma,
+		     qm->aeqe, (unsigned long)qm->aeqe_dma,
+		     qm->sqc, (unsigned long)qm->sqc_dma,
+		     qm->cqc, (unsigned long)qm->cqc_dma);
+
+	ret = qm_eq_ctx_cfg(qm);
+	if (ret)
+		return ret;
+
+	ret = qm_mb(qm, QM_MB_CMD_SQC_BT, qm->sqc_dma, 0, 0);
+	if (ret)
+		return ret;
+
+	ret = qm_mb(qm, QM_MB_CMD_CQC_BT, qm->cqc_dma, 0, 0);
+	if (ret)
+		return ret;
+
+	writel(0x0, qm->io_base + QM_VF_EQ_INT_MASK);
+	writel(0x0, qm->io_base + QM_VF_AEQ_INT_MASK);
+
+	return 0;
+}
+
+/**
+ * hisi_qm_start() - start qm
+ * @qm: The qm to be started.
+ *
+ * This function starts a qm, then we can allocate qp from this qm.
+ */
+int hisi_qm_start(struct hisi_qm *qm)
+{
+	struct device *dev = &qm->pdev->dev;
+
+	dev_dbg(dev, "qm start with %d queue pairs\n", qm->qp_num);
+
+	if (!qm->qp_num) {
+		dev_err(dev, "qp_num should not be 0\n");
+		return -EINVAL;
+	}
+
+	if (!qm->qp_bitmap) {
+		qm->qp_bitmap = devm_kcalloc(dev, BITS_TO_LONGS(qm->qp_num),
+					     sizeof(long), GFP_KERNEL);
+		qm->qp_array = devm_kcalloc(dev, qm->qp_num,
+					    sizeof(struct hisi_qp *),
+					    GFP_KERNEL);
+		if (!qm->qp_bitmap || !qm->qp_array)
+			return -ENOMEM;
+	}
+
+	if (!qm->use_dma_api) {
+		dev_dbg(&qm->pdev->dev, "qm delay start\n");
+		return 0;
+	} else if (!qm->qdma.va) {
+		qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_Q_DEPTH) +
+				QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) +
+				QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) +
+				QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num);
+		qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size,
+						 &qm->qdma.dma, GFP_KERNEL);
+		dev_dbg(dev, "allocate qm dma buf(va=%pK, dma=%pad, size=%zx)\n",
+			qm->qdma.va, &qm->qdma.dma, qm->qdma.size);
+		if (!qm->qdma.va)
+			return -ENOMEM;
+	}
+
+	return __hisi_qm_start(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_start);
+
+/**
+ * hisi_qm_stop() - Stop a qm.
+ * @qm: The qm which will be stopped.
+ *
+ * This function stops qm and its qps, then qm can not accept request.
+ * Related resources are not released at this state, we can use hisi_qm_start
+ * to let qm start again.
+ */
+int hisi_qm_stop(struct hisi_qm *qm)
+{
+	struct device *dev;
+	struct hisi_qp *qp;
+	int ret = 0, i;
+
+	if (!qm || !qm->pdev) {
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	dev = &qm->pdev->dev;
+
+	/* Mask eq and aeq irq */
+	writel(0x1, qm->io_base + QM_VF_EQ_INT_MASK);
+	writel(0x1, qm->io_base + QM_VF_AEQ_INT_MASK);
+
+	/* Stop all qps belong to this qm */
+	for (i = 0; i < qm->qp_num; i++) {
+		qp = qm->qp_array[i];
+		if (qp) {
+			ret = hisi_qm_stop_qp(qp);
+			if (ret < 0) {
+				dev_err(dev, "Failed to stop qp%d!\n", i);
+				return -EBUSY;
+			}
+		}
+	}
+
+	if (qm->fun_type == QM_HW_PF) {
+		ret = hisi_qm_set_vft(qm, 0, 0, 0);
+		if (ret < 0)
+			dev_err(dev, "Failed to set vft!\n");
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_stop);
+
+/**
+ * hisi_qm_debug_init() - Initialize qm related debugfs files.
+ * @qm: The qm for which we want to add debugfs files.
+ *
+ * Create qm related debugfs files.
+ */
+int hisi_qm_debug_init(struct hisi_qm *qm)
+{
+	struct dentry *qm_d, *qm_regs;
+	int i, ret;
+
+	qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
+	if (IS_ERR(qm_d))
+		return -ENOENT;
+	qm->debug.qm_d = qm_d;
+
+	/* only show this in PF */
+	if (qm->fun_type == QM_HW_PF)
+		for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
+			if (qm_create_debugfs_file(qm, i)) {
+				ret = -ENOENT;
+				goto failed_to_create;
+			}
+
+	qm_regs = debugfs_create_file("qm_regs", 0444, qm->debug.qm_d, qm,
+				      &qm_regs_fops);
+	if (IS_ERR(qm_regs)) {
+		ret = -ENOENT;
+		goto failed_to_create;
+	}
+
+	return 0;
+
+failed_to_create:
+	debugfs_remove_recursive(qm_d);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
+
+/**
+ * hisi_qm_debug_regs_clear() - clear qm debug related registers.
+ * @qm: The qm for which we want to clear its debug registers.
+ */
+void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
+{
+	struct qm_dfx_registers *regs;
+	int i;
+
+	/* clear current_q */
+	writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+	writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+	/*
+	 * these registers are reading and clearing, so clear them after
+	 * reading them.
+	 */
+	writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE);
+
+	regs = qm_dfx_regs;
+	for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
+		readl(qm->io_base + regs->reg_offset);
+		regs++;
+	}
+
+	writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
+
+/**
+ * hisi_qm_hw_error_init() - Configure qm hardware error report method.
+ * @qm: The qm which we want to configure.
+ * @ce: Bit mask of correctable error configure.
+ * @nfe: Bit mask of non-fatal error configure.
+ * @fe: Bit mask of fatal error configure.
+ * @msi: Bit mask of error reported by message signal interrupt.
+ *
+ * Hardware errors of qm can be reported either by RAS interrupts which will
+ * be handled by UEFI and then PCIe AER or by device MSI. User can configure
+ * each error to use either of above two methods. For RAS interrupts, we can
+ * configure an error as one of correctable error, non-fatal error or
+ * fatal error.
+ *
+ * Bits indicating errors can be configured to ce, nfe, fe and msi to enable
+ * related report methods. Error report will be masked if related error bit
+ * does not configure.
+ */
+void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
+			   u32 msi)
+{
+	if (!qm->ops->hw_error_init) {
+		dev_err(&qm->pdev->dev, "QM version %d doesn't support hw error handling!\n",
+			qm->ver);
+		return;
+	}
+
+	qm->ops->hw_error_init(qm, ce, nfe, fe, msi);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_hw_error_init);
+
+/**
+ * hisi_qm_hw_error_handle() - Handle qm non-fatal hardware errors.
+ * @qm: The qm which has non-fatal hardware errors.
+ *
+ * Accelerators use this function to handle qm non-fatal hardware errors.
+ */
+int hisi_qm_hw_error_handle(struct hisi_qm *qm)
+{
+	if (!qm->ops->hw_error_handle) {
+		dev_err(&qm->pdev->dev, "QM version %d doesn't support hw error report!\n",
+			qm->ver);
+		return PCI_ERS_RESULT_NONE;
+	}
+
+	return qm->ops->hw_error_handle(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_hw_error_handle);
+
+/**
+ * hisi_qm_get_hw_version() - Get hardware version of a qm.
+ * @pdev: The device which hardware version we want to get.
+ *
+ * This function gets the hardware version of a qm. Return QM_HW_UNKNOWN
+ * if the hardware version is not supported.
+ */
+enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev)
+{
+	switch (pdev->revision) {
+	case QM_HW_V1:
+	case QM_HW_V2:
+		return pdev->revision;
+	default:
+		return QM_HW_UNKNOWN;
+	}
+}
+EXPORT_SYMBOL_GPL(hisi_qm_get_hw_version);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
+MODULE_DESCRIPTION("HiSilicon Accelerator queue manager driver");
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
new file mode 100644
index 0000000..70e672ae
--- /dev/null
+++ b/drivers/crypto/hisilicon/qm.h
@@ -0,0 +1,215 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 HiSilicon Limited. */
+#ifndef HISI_ACC_QM_H
+#define HISI_ACC_QM_H
+
+#include <linux/bitfield.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+/* qm user domain */
+#define QM_ARUSER_M_CFG_1		0x100088
+#define AXUSER_SNOOP_ENABLE		BIT(30)
+#define AXUSER_CMD_TYPE			GENMASK(14, 12)
+#define AXUSER_CMD_SMMU_NORMAL		1
+#define AXUSER_NS			BIT(6)
+#define AXUSER_NO			BIT(5)
+#define AXUSER_FP			BIT(4)
+#define AXUSER_SSV			BIT(0)
+#define AXUSER_BASE			(AXUSER_SNOOP_ENABLE |		\
+					FIELD_PREP(AXUSER_CMD_TYPE,	\
+					AXUSER_CMD_SMMU_NORMAL) |	\
+					AXUSER_NS | AXUSER_NO | AXUSER_FP)
+#define QM_ARUSER_M_CFG_ENABLE		0x100090
+#define ARUSER_M_CFG_ENABLE		0xfffffffe
+#define QM_AWUSER_M_CFG_1		0x100098
+#define QM_AWUSER_M_CFG_ENABLE		0x1000a0
+#define AWUSER_M_CFG_ENABLE		0xfffffffe
+#define QM_WUSER_M_CFG_ENABLE		0x1000a8
+#define WUSER_M_CFG_ENABLE		0xffffffff
+
+/* qm cache */
+#define QM_CACHE_CTL			0x100050
+#define SQC_CACHE_ENABLE		BIT(0)
+#define CQC_CACHE_ENABLE		BIT(1)
+#define SQC_CACHE_WB_ENABLE		BIT(4)
+#define SQC_CACHE_WB_THRD		GENMASK(10, 5)
+#define CQC_CACHE_WB_ENABLE		BIT(11)
+#define CQC_CACHE_WB_THRD		GENMASK(17, 12)
+#define QM_AXI_M_CFG			0x1000ac
+#define AXI_M_CFG			0xffff
+#define QM_AXI_M_CFG_ENABLE		0x1000b0
+#define AXI_M_CFG_ENABLE		0xffffffff
+#define QM_PEH_AXUSER_CFG		0x1000cc
+#define QM_PEH_AXUSER_CFG_ENABLE	0x1000d0
+#define PEH_AXUSER_CFG			0x401001
+#define PEH_AXUSER_CFG_ENABLE		0xffffffff
+
+#define QM_DFX_MB_CNT_VF		0x104010
+#define QM_DFX_DB_CNT_VF		0x104020
+#define QM_DFX_SQE_CNT_VF_SQN		0x104030
+#define QM_DFX_CQE_CNT_VF_CQN		0x104040
+#define QM_DFX_QN_SHIFT			16
+#define CURRENT_FUN_MASK		GENMASK(5, 0)
+#define CURRENT_Q_MASK			GENMASK(31, 16)
+
+#define QM_AXI_RRESP			BIT(0)
+#define QM_AXI_BRESP			BIT(1)
+#define QM_ECC_MBIT			BIT(2)
+#define QM_ECC_1BIT			BIT(3)
+#define QM_ACC_GET_TASK_TIMEOUT		BIT(4)
+#define QM_ACC_DO_TASK_TIMEOUT		BIT(5)
+#define QM_ACC_WB_NOT_READY_TIMEOUT	BIT(6)
+#define QM_SQ_CQ_VF_INVALID		BIT(7)
+#define QM_CQ_VF_INVALID		BIT(8)
+#define QM_SQ_VF_INVALID		BIT(9)
+#define QM_DB_TIMEOUT			BIT(10)
+#define QM_OF_FIFO_OF			BIT(11)
+#define QM_DB_RANDOM_INVALID		BIT(12)
+
+#define QM_BASE_NFE	(QM_AXI_RRESP | QM_AXI_BRESP | QM_ECC_MBIT | \
+			 QM_ACC_GET_TASK_TIMEOUT | QM_DB_TIMEOUT | \
+			 QM_OF_FIFO_OF)
+#define QM_BASE_CE			QM_ECC_1BIT
+
+#define QM_Q_DEPTH			1024
+
+enum qp_state {
+	QP_STOP,
+};
+
+enum qm_hw_ver {
+	QM_HW_UNKNOWN = -1,
+	QM_HW_V1 = 0x20,
+	QM_HW_V2 = 0x21,
+};
+
+enum qm_fun_type {
+	QM_HW_PF,
+	QM_HW_VF,
+};
+
+enum qm_debug_file {
+	CURRENT_Q,
+	CLEAR_ENABLE,
+	DEBUG_FILE_NUM,
+};
+
+struct debugfs_file {
+	enum qm_debug_file index;
+	struct mutex lock;
+	struct qm_debug *debug;
+};
+
+struct qm_debug {
+	u32 curr_qm_qp_num;
+	struct dentry *debug_root;
+	struct dentry *qm_d;
+	struct debugfs_file files[DEBUG_FILE_NUM];
+};
+
+struct qm_dma {
+	void *va;
+	dma_addr_t dma;
+	size_t size;
+};
+
+struct hisi_qm_status {
+	u32 eq_head;
+	bool eqc_phase;
+	u32 aeq_head;
+	bool aeqc_phase;
+	unsigned long flags;
+};
+
+struct hisi_qm {
+	enum qm_hw_ver ver;
+	enum qm_fun_type fun_type;
+	const char *dev_name;
+	struct pci_dev *pdev;
+	void __iomem *io_base;
+	u32 sqe_size;
+	u32 qp_base;
+	u32 qp_num;
+	u32 ctrl_qp_num;
+
+	struct qm_dma qdma;
+	struct qm_sqc *sqc;
+	struct qm_cqc *cqc;
+	struct qm_eqe *eqe;
+	struct qm_aeqe *aeqe;
+	dma_addr_t sqc_dma;
+	dma_addr_t cqc_dma;
+	dma_addr_t eqe_dma;
+	dma_addr_t aeqe_dma;
+
+	struct hisi_qm_status status;
+
+	rwlock_t qps_lock;
+	unsigned long *qp_bitmap;
+	struct hisi_qp **qp_array;
+
+	struct mutex mailbox_lock;
+
+	const struct hisi_qm_hw_ops *ops;
+
+	struct qm_debug debug;
+
+	u32 error_mask;
+	u32 msi_mask;
+
+	bool use_dma_api;
+};
+
+struct hisi_qp_status {
+	atomic_t used;
+	u16 sq_tail;
+	u16 cq_head;
+	bool cqc_phase;
+	unsigned long flags;
+};
+
+struct hisi_qp_ops {
+	int (*fill_sqe)(void *sqe, void *q_parm, void *d_parm);
+};
+
+struct hisi_qp {
+	u32 qp_id;
+	u8 alg_type;
+	u8 req_type;
+
+	struct qm_dma qdma;
+	void *sqe;
+	struct qm_cqe *cqe;
+	dma_addr_t sqe_dma;
+	dma_addr_t cqe_dma;
+
+	struct hisi_qp_status qp_status;
+	struct hisi_qp_ops *hw_ops;
+	void *qp_ctx;
+	void (*req_cb)(struct hisi_qp *qp, void *data);
+	struct work_struct work;
+	struct workqueue_struct *wq;
+
+	struct hisi_qm *qm;
+};
+
+int hisi_qm_init(struct hisi_qm *qm);
+void hisi_qm_uninit(struct hisi_qm *qm);
+int hisi_qm_start(struct hisi_qm *qm);
+int hisi_qm_stop(struct hisi_qm *qm);
+struct hisi_qp *hisi_qm_create_qp(struct hisi_qm *qm, u8 alg_type);
+int hisi_qm_start_qp(struct hisi_qp *qp, unsigned long arg);
+int hisi_qm_stop_qp(struct hisi_qp *qp);
+void hisi_qm_release_qp(struct hisi_qp *qp);
+int hisi_qp_send(struct hisi_qp *qp, const void *msg);
+int hisi_qm_get_vft(struct hisi_qm *qm, u32 *base, u32 *number);
+int hisi_qm_set_vft(struct hisi_qm *qm, u32 fun_num, u32 base, u32 number);
+int hisi_qm_debug_init(struct hisi_qm *qm);
+void hisi_qm_hw_error_init(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe,
+			   u32 msi);
+int hisi_qm_hw_error_handle(struct hisi_qm *qm);
+enum qm_hw_ver hisi_qm_get_hw_version(struct pci_dev *pdev);
+void hisi_qm_debug_regs_clear(struct hisi_qm *qm);
+#endif
diff --git a/drivers/crypto/hisilicon/sec/sec_algs.c b/drivers/crypto/hisilicon/sec/sec_algs.c
index 02768af..e0508ea 100644
--- a/drivers/crypto/hisilicon/sec/sec_algs.c
+++ b/drivers/crypto/hisilicon/sec/sec_algs.c
@@ -9,7 +9,7 @@
 
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/skcipher.h>
 #include <crypto/xts.h>
 #include <crypto/internal/skcipher.h>
@@ -347,25 +347,21 @@
 static int sec_alg_skcipher_setkey_des_ecb(struct crypto_skcipher *tfm,
 					   const u8 *key, unsigned int keylen)
 {
-	if (keylen != DES_KEY_SIZE)
-		return -EINVAL;
-
-	return sec_alg_skcipher_setkey(tfm, key, keylen, SEC_C_DES_ECB_64);
+	return verify_skcipher_des_key(tfm, key) ?:
+	       sec_alg_skcipher_setkey(tfm, key, keylen, SEC_C_DES_ECB_64);
 }
 
 static int sec_alg_skcipher_setkey_des_cbc(struct crypto_skcipher *tfm,
 					   const u8 *key, unsigned int keylen)
 {
-	if (keylen != DES_KEY_SIZE)
-		return -EINVAL;
-
-	return sec_alg_skcipher_setkey(tfm, key, keylen, SEC_C_DES_CBC_64);
+	return verify_skcipher_des_key(tfm, key) ?:
+	       sec_alg_skcipher_setkey(tfm, key, keylen, SEC_C_DES_CBC_64);
 }
 
 static int sec_alg_skcipher_setkey_3des_ecb(struct crypto_skcipher *tfm,
 					    const u8 *key, unsigned int keylen)
 {
-	return unlikely(des3_verify_key(tfm, key)) ?:
+	return verify_skcipher_des3_key(tfm, key) ?:
 	       sec_alg_skcipher_setkey(tfm, key, keylen,
 				       SEC_C_3DES_ECB_192_3KEY);
 }
@@ -373,7 +369,7 @@
 static int sec_alg_skcipher_setkey_3des_cbc(struct crypto_skcipher *tfm,
 					    const u8 *key, unsigned int keylen)
 {
-	return unlikely(des3_verify_key(tfm, key)) ?:
+	return verify_skcipher_des3_key(tfm, key) ?:
 	       sec_alg_skcipher_setkey(tfm, key, keylen,
 				       SEC_C_3DES_CBC_192_3KEY);
 }
diff --git a/drivers/crypto/hisilicon/sgl.c b/drivers/crypto/hisilicon/sgl.c
new file mode 100644
index 0000000..e083d17
--- /dev/null
+++ b/drivers/crypto/hisilicon/sgl.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 HiSilicon Limited. */
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
+#include "./sgl.h"
+
+#define HISI_ACC_SGL_SGE_NR_MIN		1
+#define HISI_ACC_SGL_SGE_NR_MAX		255
+#define HISI_ACC_SGL_SGE_NR_DEF		10
+#define HISI_ACC_SGL_NR_MAX		256
+#define HISI_ACC_SGL_ALIGN_SIZE		64
+
+static int acc_sgl_sge_set(const char *val, const struct kernel_param *kp)
+{
+	int ret;
+	u32 n;
+
+	if (!val)
+		return -EINVAL;
+
+	ret = kstrtou32(val, 10, &n);
+	if (ret != 0 || n > HISI_ACC_SGL_SGE_NR_MAX || n == 0)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
+static const struct kernel_param_ops acc_sgl_sge_ops = {
+	.set = acc_sgl_sge_set,
+	.get = param_get_int,
+};
+
+static u32 acc_sgl_sge_nr = HISI_ACC_SGL_SGE_NR_DEF;
+module_param_cb(acc_sgl_sge_nr, &acc_sgl_sge_ops, &acc_sgl_sge_nr, 0444);
+MODULE_PARM_DESC(acc_sgl_sge_nr, "Number of sge in sgl(1-255)");
+
+struct acc_hw_sge {
+	dma_addr_t buf;
+	void *page_ctrl;
+	__le32 len;
+	__le32 pad;
+	__le32 pad0;
+	__le32 pad1;
+};
+
+/* use default sgl head size 64B */
+struct hisi_acc_hw_sgl {
+	dma_addr_t next_dma;
+	__le16 entry_sum_in_chain;
+	__le16 entry_sum_in_sgl;
+	__le16 entry_length_in_sgl;
+	__le16 pad0;
+	__le64 pad1[5];
+	struct hisi_acc_hw_sgl *next;
+	struct acc_hw_sge sge_entries[];
+} __aligned(1);
+
+/**
+ * hisi_acc_create_sgl_pool() - Create a hw sgl pool.
+ * @dev: The device which hw sgl pool belongs to.
+ * @pool: Pointer of pool.
+ * @count: Count of hisi_acc_hw_sgl in pool.
+ *
+ * This function creates a hw sgl pool, after this user can get hw sgl memory
+ * from it.
+ */
+int hisi_acc_create_sgl_pool(struct device *dev,
+			     struct hisi_acc_sgl_pool *pool, u32 count)
+{
+	u32 sgl_size;
+	u32 size;
+
+	if (!dev || !pool || !count)
+		return -EINVAL;
+
+	sgl_size = sizeof(struct acc_hw_sge) * acc_sgl_sge_nr +
+		   sizeof(struct hisi_acc_hw_sgl);
+	size = sgl_size * count;
+
+	pool->sgl = dma_alloc_coherent(dev, size, &pool->sgl_dma, GFP_KERNEL);
+	if (!pool->sgl)
+		return -ENOMEM;
+
+	pool->size = size;
+	pool->count = count;
+	pool->sgl_size = sgl_size;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(hisi_acc_create_sgl_pool);
+
+/**
+ * hisi_acc_free_sgl_pool() - Free a hw sgl pool.
+ * @dev: The device which hw sgl pool belongs to.
+ * @pool: Pointer of pool.
+ *
+ * This function frees memory of a hw sgl pool.
+ */
+void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool)
+{
+	dma_free_coherent(dev, pool->size, pool->sgl, pool->sgl_dma);
+	memset(pool, 0, sizeof(struct hisi_acc_sgl_pool));
+}
+EXPORT_SYMBOL_GPL(hisi_acc_free_sgl_pool);
+
+struct hisi_acc_hw_sgl *acc_get_sgl(struct hisi_acc_sgl_pool *pool, u32 index,
+				    dma_addr_t *hw_sgl_dma)
+{
+	if (!pool || !hw_sgl_dma || index >= pool->count || !pool->sgl)
+		return ERR_PTR(-EINVAL);
+
+	*hw_sgl_dma = pool->sgl_dma + pool->sgl_size * index;
+	return (void *)pool->sgl + pool->sgl_size * index;
+}
+
+void acc_put_sgl(struct hisi_acc_sgl_pool *pool, u32 index) {}
+
+static void sg_map_to_hw_sg(struct scatterlist *sgl,
+			    struct acc_hw_sge *hw_sge)
+{
+	hw_sge->buf = sgl->dma_address;
+	hw_sge->len = sgl->dma_length;
+}
+
+static void inc_hw_sgl_sge(struct hisi_acc_hw_sgl *hw_sgl)
+{
+	hw_sgl->entry_sum_in_sgl++;
+}
+
+static void update_hw_sgl_sum_sge(struct hisi_acc_hw_sgl *hw_sgl, u16 sum)
+{
+	hw_sgl->entry_sum_in_chain = sum;
+}
+
+/**
+ * hisi_acc_sg_buf_map_to_hw_sgl - Map a scatterlist to a hw sgl.
+ * @dev: The device which hw sgl belongs to.
+ * @sgl: Scatterlist which will be mapped to hw sgl.
+ * @pool: Pool which hw sgl memory will be allocated in.
+ * @index: Index of hisi_acc_hw_sgl in pool.
+ * @hw_sgl_dma: The dma address of allocated hw sgl.
+ *
+ * This function builds hw sgl according input sgl, user can use hw_sgl_dma
+ * as src/dst in its BD. Only support single hw sgl currently.
+ */
+struct hisi_acc_hw_sgl *
+hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
+			      struct scatterlist *sgl,
+			      struct hisi_acc_sgl_pool *pool,
+			      u32 index, dma_addr_t *hw_sgl_dma)
+{
+	struct hisi_acc_hw_sgl *curr_hw_sgl;
+	dma_addr_t curr_sgl_dma = 0;
+	struct acc_hw_sge *curr_hw_sge;
+	struct scatterlist *sg;
+	int sg_n = sg_nents(sgl);
+	int i, ret;
+
+	if (!dev || !sgl || !pool || !hw_sgl_dma || sg_n > acc_sgl_sge_nr)
+		return ERR_PTR(-EINVAL);
+
+	ret = dma_map_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
+	if (!ret)
+		return ERR_PTR(-EINVAL);
+
+	curr_hw_sgl = acc_get_sgl(pool, index, &curr_sgl_dma);
+	if (!curr_hw_sgl) {
+		ret = -ENOMEM;
+		goto err_unmap_sg;
+	}
+	curr_hw_sgl->entry_length_in_sgl = acc_sgl_sge_nr;
+	curr_hw_sge = curr_hw_sgl->sge_entries;
+
+	for_each_sg(sgl, sg, sg_n, i) {
+		sg_map_to_hw_sg(sg, curr_hw_sge);
+		inc_hw_sgl_sge(curr_hw_sgl);
+		curr_hw_sge++;
+	}
+
+	update_hw_sgl_sum_sge(curr_hw_sgl, acc_sgl_sge_nr);
+	*hw_sgl_dma = curr_sgl_dma;
+
+	return curr_hw_sgl;
+
+err_unmap_sg:
+	dma_unmap_sg(dev, sgl, sg_n, DMA_BIDIRECTIONAL);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_map_to_hw_sgl);
+
+/**
+ * hisi_acc_sg_buf_unmap() - Unmap allocated hw sgl.
+ * @dev: The device which hw sgl belongs to.
+ * @sgl: Related scatterlist.
+ * @hw_sgl: Virtual address of hw sgl.
+ * @hw_sgl_dma: DMA address of hw sgl.
+ * @pool: Pool which hw sgl is allocated in.
+ *
+ * This function unmaps allocated hw sgl.
+ */
+void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
+			   struct hisi_acc_hw_sgl *hw_sgl)
+{
+	dma_unmap_sg(dev, sgl, sg_nents(sgl), DMA_BIDIRECTIONAL);
+
+	hw_sgl->entry_sum_in_chain = 0;
+	hw_sgl->entry_sum_in_sgl = 0;
+	hw_sgl->entry_length_in_sgl = 0;
+}
+EXPORT_SYMBOL_GPL(hisi_acc_sg_buf_unmap);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
+MODULE_DESCRIPTION("HiSilicon Accelerator SGL support");
diff --git a/drivers/crypto/hisilicon/sgl.h b/drivers/crypto/hisilicon/sgl.h
new file mode 100644
index 0000000..3ac8871
--- /dev/null
+++ b/drivers/crypto/hisilicon/sgl.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 HiSilicon Limited. */
+#ifndef HISI_ACC_SGL_H
+#define HISI_ACC_SGL_H
+
+struct hisi_acc_sgl_pool {
+	struct hisi_acc_hw_sgl *sgl;
+	dma_addr_t sgl_dma;
+	size_t size;
+	u32 count;
+	size_t sgl_size;
+};
+
+struct hisi_acc_hw_sgl *
+hisi_acc_sg_buf_map_to_hw_sgl(struct device *dev,
+			      struct scatterlist *sgl,
+			      struct hisi_acc_sgl_pool *pool,
+			      u32 index, dma_addr_t *hw_sgl_dma);
+void hisi_acc_sg_buf_unmap(struct device *dev, struct scatterlist *sgl,
+			   struct hisi_acc_hw_sgl *hw_sgl);
+int hisi_acc_create_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool,
+			     u32 count);
+void hisi_acc_free_sgl_pool(struct device *dev, struct hisi_acc_sgl_pool *pool);
+#endif
diff --git a/drivers/crypto/hisilicon/zip/Makefile b/drivers/crypto/hisilicon/zip/Makefile
new file mode 100644
index 0000000..a936f09
--- /dev/null
+++ b/drivers/crypto/hisilicon/zip/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_HISI_ZIP) += hisi_zip.o
+hisi_zip-objs = zip_main.o zip_crypto.o
diff --git a/drivers/crypto/hisilicon/zip/zip.h b/drivers/crypto/hisilicon/zip/zip.h
new file mode 100644
index 0000000..ffb00d9
--- /dev/null
+++ b/drivers/crypto/hisilicon/zip/zip.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2019 HiSilicon Limited. */
+#ifndef HISI_ZIP_H
+#define HISI_ZIP_H
+
+#undef pr_fmt
+#define pr_fmt(fmt)	"hisi_zip: " fmt
+
+#include <linux/list.h>
+#include "../qm.h"
+#include "../sgl.h"
+
+/* hisi_zip_sqe dw3 */
+#define HZIP_BD_STATUS_M			GENMASK(7, 0)
+/* hisi_zip_sqe dw9 */
+#define HZIP_REQ_TYPE_M				GENMASK(7, 0)
+#define HZIP_ALG_TYPE_ZLIB			0x02
+#define HZIP_ALG_TYPE_GZIP			0x03
+#define HZIP_BUF_TYPE_M				GENMASK(11, 8)
+#define HZIP_PBUFFER				0x0
+#define HZIP_SGL				0x1
+
+enum hisi_zip_error_type {
+	/* negative compression */
+	HZIP_NC_ERR = 0x0d,
+};
+
+struct hisi_zip_ctrl;
+
+struct hisi_zip {
+	struct hisi_qm qm;
+	struct list_head list;
+	struct hisi_zip_ctrl *ctrl;
+};
+
+struct hisi_zip_sqe {
+	u32 consumed;
+	u32 produced;
+	u32 comp_data_length;
+	u32 dw3;
+	u32 input_data_length;
+	u32 lba_l;
+	u32 lba_h;
+	u32 dw7;
+	u32 dw8;
+	u32 dw9;
+	u32 dw10;
+	u32 priv_info;
+	u32 dw12;
+	u32 tag;
+	u32 dest_avail_out;
+	u32 rsvd0;
+	u32 comp_head_addr_l;
+	u32 comp_head_addr_h;
+	u32 source_addr_l;
+	u32 source_addr_h;
+	u32 dest_addr_l;
+	u32 dest_addr_h;
+	u32 stream_ctx_addr_l;
+	u32 stream_ctx_addr_h;
+	u32 cipher_key1_addr_l;
+	u32 cipher_key1_addr_h;
+	u32 cipher_key2_addr_l;
+	u32 cipher_key2_addr_h;
+	u32 rsvd1[4];
+};
+
+struct hisi_zip *find_zip_device(int node);
+int hisi_zip_register_to_crypto(void);
+void hisi_zip_unregister_from_crypto(void);
+#endif
diff --git a/drivers/crypto/hisilicon/zip/zip_crypto.c b/drivers/crypto/hisilicon/zip/zip_crypto.c
new file mode 100644
index 0000000..5a3f84d
--- /dev/null
+++ b/drivers/crypto/hisilicon/zip/zip_crypto.c
@@ -0,0 +1,653 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 HiSilicon Limited. */
+#include <crypto/internal/acompress.h>
+#include <linux/bitfield.h>
+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+#include "zip.h"
+
+#define HZIP_ZLIB_HEAD_SIZE			2
+#define HZIP_GZIP_HEAD_SIZE			10
+
+#define GZIP_HEAD_FHCRC_BIT			BIT(1)
+#define GZIP_HEAD_FEXTRA_BIT			BIT(2)
+#define GZIP_HEAD_FNAME_BIT			BIT(3)
+#define GZIP_HEAD_FCOMMENT_BIT			BIT(4)
+
+#define GZIP_HEAD_FLG_SHIFT			3
+#define GZIP_HEAD_FEXTRA_SHIFT			10
+#define GZIP_HEAD_FEXTRA_XLEN			2
+#define GZIP_HEAD_FHCRC_SIZE			2
+
+#define HZIP_CTX_Q_NUM				2
+#define HZIP_GZIP_HEAD_BUF			256
+#define HZIP_ALG_PRIORITY			300
+
+static const u8 zlib_head[HZIP_ZLIB_HEAD_SIZE] = {0x78, 0x9c};
+static const u8 gzip_head[HZIP_GZIP_HEAD_SIZE] = {0x1f, 0x8b, 0x08, 0x0, 0x0,
+						  0x0, 0x0, 0x0, 0x0, 0x03};
+enum hisi_zip_alg_type {
+	HZIP_ALG_TYPE_COMP = 0,
+	HZIP_ALG_TYPE_DECOMP = 1,
+};
+
+#define COMP_NAME_TO_TYPE(alg_name)					\
+	(!strcmp((alg_name), "zlib-deflate") ? HZIP_ALG_TYPE_ZLIB :	\
+	 !strcmp((alg_name), "gzip") ? HZIP_ALG_TYPE_GZIP : 0)		\
+
+#define TO_HEAD_SIZE(req_type)						\
+	(((req_type) == HZIP_ALG_TYPE_ZLIB) ? sizeof(zlib_head) :	\
+	 ((req_type) == HZIP_ALG_TYPE_GZIP) ? sizeof(gzip_head) : 0)	\
+
+#define TO_HEAD(req_type)						\
+	(((req_type) == HZIP_ALG_TYPE_ZLIB) ? zlib_head :		\
+	 ((req_type) == HZIP_ALG_TYPE_GZIP) ? gzip_head : 0)		\
+
+struct hisi_zip_req {
+	struct acomp_req *req;
+	struct scatterlist *src;
+	struct scatterlist *dst;
+	size_t slen;
+	size_t dlen;
+	struct hisi_acc_hw_sgl *hw_src;
+	struct hisi_acc_hw_sgl *hw_dst;
+	dma_addr_t dma_src;
+	dma_addr_t dma_dst;
+	int req_id;
+};
+
+struct hisi_zip_req_q {
+	struct hisi_zip_req *q;
+	unsigned long *req_bitmap;
+	rwlock_t req_lock;
+	u16 size;
+};
+
+struct hisi_zip_qp_ctx {
+	struct hisi_qp *qp;
+	struct hisi_zip_sqe zip_sqe;
+	struct hisi_zip_req_q req_q;
+	struct hisi_acc_sgl_pool sgl_pool;
+	struct hisi_zip *zip_dev;
+	struct hisi_zip_ctx *ctx;
+};
+
+struct hisi_zip_ctx {
+#define QPC_COMP	0
+#define QPC_DECOMP	1
+	struct hisi_zip_qp_ctx qp_ctx[HZIP_CTX_Q_NUM];
+};
+
+static void hisi_zip_config_buf_type(struct hisi_zip_sqe *sqe, u8 buf_type)
+{
+	u32 val;
+
+	val = (sqe->dw9) & ~HZIP_BUF_TYPE_M;
+	val |= FIELD_PREP(HZIP_BUF_TYPE_M, buf_type);
+	sqe->dw9 = val;
+}
+
+static void hisi_zip_config_tag(struct hisi_zip_sqe *sqe, u32 tag)
+{
+	sqe->tag = tag;
+}
+
+static void hisi_zip_fill_sqe(struct hisi_zip_sqe *sqe, u8 req_type,
+			      dma_addr_t s_addr, dma_addr_t d_addr, u32 slen,
+			      u32 dlen)
+{
+	memset(sqe, 0, sizeof(struct hisi_zip_sqe));
+
+	sqe->input_data_length = slen;
+	sqe->dw9 = FIELD_PREP(HZIP_REQ_TYPE_M, req_type);
+	sqe->dest_avail_out = dlen;
+	sqe->source_addr_l = lower_32_bits(s_addr);
+	sqe->source_addr_h = upper_32_bits(s_addr);
+	sqe->dest_addr_l = lower_32_bits(d_addr);
+	sqe->dest_addr_h = upper_32_bits(d_addr);
+}
+
+static int hisi_zip_create_qp(struct hisi_qm *qm, struct hisi_zip_qp_ctx *ctx,
+			      int alg_type, int req_type)
+{
+	struct hisi_qp *qp;
+	int ret;
+
+	qp = hisi_qm_create_qp(qm, alg_type);
+	if (IS_ERR(qp))
+		return PTR_ERR(qp);
+
+	qp->req_type = req_type;
+	qp->qp_ctx = ctx;
+	ctx->qp = qp;
+
+	ret = hisi_qm_start_qp(qp, 0);
+	if (ret < 0)
+		goto err_release_qp;
+
+	return 0;
+
+err_release_qp:
+	hisi_qm_release_qp(qp);
+	return ret;
+}
+
+static void hisi_zip_release_qp(struct hisi_zip_qp_ctx *ctx)
+{
+	hisi_qm_stop_qp(ctx->qp);
+	hisi_qm_release_qp(ctx->qp);
+}
+
+static int hisi_zip_ctx_init(struct hisi_zip_ctx *hisi_zip_ctx, u8 req_type)
+{
+	struct hisi_zip *hisi_zip;
+	struct hisi_qm *qm;
+	int ret, i, j;
+
+	/* find the proper zip device */
+	hisi_zip = find_zip_device(cpu_to_node(smp_processor_id()));
+	if (!hisi_zip) {
+		pr_err("Failed to find a proper ZIP device!\n");
+		return -ENODEV;
+	}
+	qm = &hisi_zip->qm;
+
+	for (i = 0; i < HZIP_CTX_Q_NUM; i++) {
+		/* alg_type = 0 for compress, 1 for decompress in hw sqe */
+		ret = hisi_zip_create_qp(qm, &hisi_zip_ctx->qp_ctx[i], i,
+					 req_type);
+		if (ret)
+			goto err;
+
+		hisi_zip_ctx->qp_ctx[i].zip_dev = hisi_zip;
+	}
+
+	return 0;
+err:
+	for (j = i - 1; j >= 0; j--)
+		hisi_zip_release_qp(&hisi_zip_ctx->qp_ctx[j]);
+
+	return ret;
+}
+
+static void hisi_zip_ctx_exit(struct hisi_zip_ctx *hisi_zip_ctx)
+{
+	int i;
+
+	for (i = 1; i >= 0; i--)
+		hisi_zip_release_qp(&hisi_zip_ctx->qp_ctx[i]);
+}
+
+static u16 get_extra_field_size(const u8 *start)
+{
+	return *((u16 *)start) + GZIP_HEAD_FEXTRA_XLEN;
+}
+
+static u32 get_name_field_size(const u8 *start)
+{
+	return strlen(start) + 1;
+}
+
+static u32 get_comment_field_size(const u8 *start)
+{
+	return strlen(start) + 1;
+}
+
+static u32 __get_gzip_head_size(const u8 *src)
+{
+	u8 head_flg = *(src + GZIP_HEAD_FLG_SHIFT);
+	u32 size = GZIP_HEAD_FEXTRA_SHIFT;
+
+	if (head_flg & GZIP_HEAD_FEXTRA_BIT)
+		size += get_extra_field_size(src + size);
+	if (head_flg & GZIP_HEAD_FNAME_BIT)
+		size += get_name_field_size(src + size);
+	if (head_flg & GZIP_HEAD_FCOMMENT_BIT)
+		size += get_comment_field_size(src + size);
+	if (head_flg & GZIP_HEAD_FHCRC_BIT)
+		size += GZIP_HEAD_FHCRC_SIZE;
+
+	return size;
+}
+
+static int hisi_zip_create_req_q(struct hisi_zip_ctx *ctx)
+{
+	struct hisi_zip_req_q *req_q;
+	int i, ret;
+
+	for (i = 0; i < HZIP_CTX_Q_NUM; i++) {
+		req_q = &ctx->qp_ctx[i].req_q;
+		req_q->size = QM_Q_DEPTH;
+
+		req_q->req_bitmap = kcalloc(BITS_TO_LONGS(req_q->size),
+					    sizeof(long), GFP_KERNEL);
+		if (!req_q->req_bitmap) {
+			ret = -ENOMEM;
+			if (i == 0)
+				return ret;
+
+			goto err_free_loop0;
+		}
+		rwlock_init(&req_q->req_lock);
+
+		req_q->q = kcalloc(req_q->size, sizeof(struct hisi_zip_req),
+				   GFP_KERNEL);
+		if (!req_q->q) {
+			ret = -ENOMEM;
+			if (i == 0)
+				goto err_free_bitmap;
+			else
+				goto err_free_loop1;
+		}
+	}
+
+	return 0;
+
+err_free_loop1:
+	kfree(ctx->qp_ctx[QPC_DECOMP].req_q.req_bitmap);
+err_free_loop0:
+	kfree(ctx->qp_ctx[QPC_COMP].req_q.q);
+err_free_bitmap:
+	kfree(ctx->qp_ctx[QPC_COMP].req_q.req_bitmap);
+	return ret;
+}
+
+static void hisi_zip_release_req_q(struct hisi_zip_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < HZIP_CTX_Q_NUM; i++) {
+		kfree(ctx->qp_ctx[i].req_q.q);
+		kfree(ctx->qp_ctx[i].req_q.req_bitmap);
+	}
+}
+
+static int hisi_zip_create_sgl_pool(struct hisi_zip_ctx *ctx)
+{
+	struct hisi_zip_qp_ctx *tmp;
+	int i, ret;
+
+	for (i = 0; i < HZIP_CTX_Q_NUM; i++) {
+		tmp = &ctx->qp_ctx[i];
+		ret = hisi_acc_create_sgl_pool(&tmp->qp->qm->pdev->dev,
+					       &tmp->sgl_pool,
+					       QM_Q_DEPTH << 1);
+		if (ret < 0) {
+			if (i == 1)
+				goto err_free_sgl_pool0;
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+
+err_free_sgl_pool0:
+	hisi_acc_free_sgl_pool(&ctx->qp_ctx[QPC_COMP].qp->qm->pdev->dev,
+			       &ctx->qp_ctx[QPC_COMP].sgl_pool);
+	return -ENOMEM;
+}
+
+static void hisi_zip_release_sgl_pool(struct hisi_zip_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < HZIP_CTX_Q_NUM; i++)
+		hisi_acc_free_sgl_pool(&ctx->qp_ctx[i].qp->qm->pdev->dev,
+				       &ctx->qp_ctx[i].sgl_pool);
+}
+
+static void hisi_zip_remove_req(struct hisi_zip_qp_ctx *qp_ctx,
+				struct hisi_zip_req *req)
+{
+	struct hisi_zip_req_q *req_q = &qp_ctx->req_q;
+
+	if (qp_ctx->qp->alg_type == HZIP_ALG_TYPE_COMP)
+		kfree(req->dst);
+	else
+		kfree(req->src);
+
+	write_lock(&req_q->req_lock);
+	clear_bit(req->req_id, req_q->req_bitmap);
+	memset(req, 0, sizeof(struct hisi_zip_req));
+	write_unlock(&req_q->req_lock);
+}
+
+static void hisi_zip_acomp_cb(struct hisi_qp *qp, void *data)
+{
+	struct hisi_zip_sqe *sqe = data;
+	struct hisi_zip_qp_ctx *qp_ctx = qp->qp_ctx;
+	struct hisi_zip_req_q *req_q = &qp_ctx->req_q;
+	struct hisi_zip_req *req = req_q->q + sqe->tag;
+	struct acomp_req *acomp_req = req->req;
+	struct device *dev = &qp->qm->pdev->dev;
+	u32 status, dlen, head_size;
+	int err = 0;
+
+	status = sqe->dw3 & HZIP_BD_STATUS_M;
+
+	if (status != 0 && status != HZIP_NC_ERR) {
+		dev_err(dev, "%scompress fail in qp%u: %u, output: %u\n",
+			(qp->alg_type == 0) ? "" : "de", qp->qp_id, status,
+			sqe->produced);
+		err = -EIO;
+	}
+	dlen = sqe->produced;
+
+	hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src);
+	hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst);
+
+	head_size = (qp->alg_type == 0) ? TO_HEAD_SIZE(qp->req_type) : 0;
+	acomp_req->dlen = dlen + head_size;
+
+	if (acomp_req->base.complete)
+		acomp_request_complete(acomp_req, err);
+
+	hisi_zip_remove_req(qp_ctx, req);
+}
+
+static void hisi_zip_set_acomp_cb(struct hisi_zip_ctx *ctx,
+				  void (*fn)(struct hisi_qp *, void *))
+{
+	int i;
+
+	for (i = 0; i < HZIP_CTX_Q_NUM; i++)
+		ctx->qp_ctx[i].qp->req_cb = fn;
+}
+
+static int hisi_zip_acomp_init(struct crypto_acomp *tfm)
+{
+	const char *alg_name = crypto_tfm_alg_name(&tfm->base);
+	struct hisi_zip_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+	int ret;
+
+	ret = hisi_zip_ctx_init(ctx, COMP_NAME_TO_TYPE(alg_name));
+	if (ret)
+		return ret;
+
+	ret = hisi_zip_create_req_q(ctx);
+	if (ret)
+		goto err_ctx_exit;
+
+	ret = hisi_zip_create_sgl_pool(ctx);
+	if (ret)
+		goto err_release_req_q;
+
+	hisi_zip_set_acomp_cb(ctx, hisi_zip_acomp_cb);
+
+	return 0;
+
+err_release_req_q:
+	hisi_zip_release_req_q(ctx);
+err_ctx_exit:
+	hisi_zip_ctx_exit(ctx);
+	return ret;
+}
+
+static void hisi_zip_acomp_exit(struct crypto_acomp *tfm)
+{
+	struct hisi_zip_ctx *ctx = crypto_tfm_ctx(&tfm->base);
+
+	hisi_zip_set_acomp_cb(ctx, NULL);
+	hisi_zip_release_sgl_pool(ctx);
+	hisi_zip_release_req_q(ctx);
+	hisi_zip_ctx_exit(ctx);
+}
+
+static int add_comp_head(struct scatterlist *dst, u8 req_type)
+{
+	int head_size = TO_HEAD_SIZE(req_type);
+	const u8 *head = TO_HEAD(req_type);
+	int ret;
+
+	ret = sg_copy_from_buffer(dst, sg_nents(dst), head, head_size);
+	if (ret != head_size)
+		return -ENOMEM;
+
+	return head_size;
+}
+
+static size_t get_gzip_head_size(struct scatterlist *sgl)
+{
+	char buf[HZIP_GZIP_HEAD_BUF];
+
+	sg_copy_to_buffer(sgl, sg_nents(sgl), buf, sizeof(buf));
+
+	return __get_gzip_head_size(buf);
+}
+
+static size_t get_comp_head_size(struct scatterlist *src, u8 req_type)
+{
+	switch (req_type) {
+	case HZIP_ALG_TYPE_ZLIB:
+		return TO_HEAD_SIZE(HZIP_ALG_TYPE_ZLIB);
+	case HZIP_ALG_TYPE_GZIP:
+		return get_gzip_head_size(src);
+	default:
+		pr_err("request type does not support!\n");
+		return -EINVAL;
+	}
+}
+
+static int get_sg_skip_bytes(struct scatterlist *sgl, size_t bytes,
+			     size_t remains, struct scatterlist **out)
+{
+#define SPLIT_NUM 2
+	size_t split_sizes[SPLIT_NUM];
+	int out_mapped_nents[SPLIT_NUM];
+
+	split_sizes[0] = bytes;
+	split_sizes[1] = remains;
+
+	return sg_split(sgl, 0, 0, SPLIT_NUM, split_sizes, out,
+			out_mapped_nents, GFP_KERNEL);
+}
+
+static struct hisi_zip_req *hisi_zip_create_req(struct acomp_req *req,
+						struct hisi_zip_qp_ctx *qp_ctx,
+						size_t head_size, bool is_comp)
+{
+	struct hisi_zip_req_q *req_q = &qp_ctx->req_q;
+	struct hisi_zip_req *q = req_q->q;
+	struct hisi_zip_req *req_cache;
+	struct scatterlist *out[2];
+	struct scatterlist *sgl;
+	size_t len;
+	int ret, req_id;
+
+	/*
+	 * remove/add zlib/gzip head, as hardware operations do not include
+	 * comp head. so split req->src to get sgl without heads in acomp, or
+	 * add comp head to req->dst ahead of that hardware output compressed
+	 * data in sgl splited from req->dst without comp head.
+	 */
+	if (is_comp) {
+		sgl = req->dst;
+		len = req->dlen - head_size;
+	} else {
+		sgl = req->src;
+		len = req->slen - head_size;
+	}
+
+	ret = get_sg_skip_bytes(sgl, head_size, len, out);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/* sgl for comp head is useless, so free it now */
+	kfree(out[0]);
+
+	write_lock(&req_q->req_lock);
+
+	req_id = find_first_zero_bit(req_q->req_bitmap, req_q->size);
+	if (req_id >= req_q->size) {
+		write_unlock(&req_q->req_lock);
+		dev_dbg(&qp_ctx->qp->qm->pdev->dev, "req cache is full!\n");
+		kfree(out[1]);
+		return ERR_PTR(-EBUSY);
+	}
+	set_bit(req_id, req_q->req_bitmap);
+
+	req_cache = q + req_id;
+	req_cache->req_id = req_id;
+	req_cache->req = req;
+	if (is_comp) {
+		req_cache->src = req->src;
+		req_cache->dst = out[1];
+		req_cache->slen = req->slen;
+		req_cache->dlen = req->dlen - head_size;
+	} else {
+		req_cache->src = out[1];
+		req_cache->dst = req->dst;
+		req_cache->slen = req->slen - head_size;
+		req_cache->dlen = req->dlen;
+	}
+
+	write_unlock(&req_q->req_lock);
+
+	return req_cache;
+}
+
+static int hisi_zip_do_work(struct hisi_zip_req *req,
+			    struct hisi_zip_qp_ctx *qp_ctx)
+{
+	struct hisi_zip_sqe *zip_sqe = &qp_ctx->zip_sqe;
+	struct hisi_qp *qp = qp_ctx->qp;
+	struct device *dev = &qp->qm->pdev->dev;
+	struct hisi_acc_sgl_pool *pool = &qp_ctx->sgl_pool;
+	dma_addr_t input;
+	dma_addr_t output;
+	int ret;
+
+	if (!req->src || !req->slen || !req->dst || !req->dlen)
+		return -EINVAL;
+
+	req->hw_src = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->src, pool,
+						    req->req_id << 1, &input);
+	if (IS_ERR(req->hw_src))
+		return PTR_ERR(req->hw_src);
+	req->dma_src = input;
+
+	req->hw_dst = hisi_acc_sg_buf_map_to_hw_sgl(dev, req->dst, pool,
+						    (req->req_id << 1) + 1,
+						    &output);
+	if (IS_ERR(req->hw_dst)) {
+		ret = PTR_ERR(req->hw_dst);
+		goto err_unmap_input;
+	}
+	req->dma_dst = output;
+
+	hisi_zip_fill_sqe(zip_sqe, qp->req_type, input, output, req->slen,
+			  req->dlen);
+	hisi_zip_config_buf_type(zip_sqe, HZIP_SGL);
+	hisi_zip_config_tag(zip_sqe, req->req_id);
+
+	/* send command to start a task */
+	ret = hisi_qp_send(qp, zip_sqe);
+	if (ret < 0)
+		goto err_unmap_output;
+
+	return -EINPROGRESS;
+
+err_unmap_output:
+	hisi_acc_sg_buf_unmap(dev, req->dst, req->hw_dst);
+err_unmap_input:
+	hisi_acc_sg_buf_unmap(dev, req->src, req->hw_src);
+	return ret;
+}
+
+static int hisi_zip_acompress(struct acomp_req *acomp_req)
+{
+	struct hisi_zip_ctx *ctx = crypto_tfm_ctx(acomp_req->base.tfm);
+	struct hisi_zip_qp_ctx *qp_ctx = &ctx->qp_ctx[QPC_COMP];
+	struct hisi_zip_req *req;
+	size_t head_size;
+	int ret;
+
+	/* let's output compression head now */
+	head_size = add_comp_head(acomp_req->dst, qp_ctx->qp->req_type);
+	if (head_size < 0)
+		return -ENOMEM;
+
+	req = hisi_zip_create_req(acomp_req, qp_ctx, head_size, true);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	ret = hisi_zip_do_work(req, qp_ctx);
+	if (ret != -EINPROGRESS)
+		hisi_zip_remove_req(qp_ctx, req);
+
+	return ret;
+}
+
+static int hisi_zip_adecompress(struct acomp_req *acomp_req)
+{
+	struct hisi_zip_ctx *ctx = crypto_tfm_ctx(acomp_req->base.tfm);
+	struct hisi_zip_qp_ctx *qp_ctx = &ctx->qp_ctx[QPC_DECOMP];
+	struct hisi_zip_req *req;
+	size_t head_size;
+	int ret;
+
+	head_size = get_comp_head_size(acomp_req->src, qp_ctx->qp->req_type);
+
+	req = hisi_zip_create_req(acomp_req, qp_ctx, head_size, false);
+	if (IS_ERR(req))
+		return PTR_ERR(req);
+
+	ret = hisi_zip_do_work(req, qp_ctx);
+	if (ret != -EINPROGRESS)
+		hisi_zip_remove_req(qp_ctx, req);
+
+	return ret;
+}
+
+static struct acomp_alg hisi_zip_acomp_zlib = {
+	.init			= hisi_zip_acomp_init,
+	.exit			= hisi_zip_acomp_exit,
+	.compress		= hisi_zip_acompress,
+	.decompress		= hisi_zip_adecompress,
+	.base			= {
+		.cra_name		= "zlib-deflate",
+		.cra_driver_name	= "hisi-zlib-acomp",
+		.cra_module		= THIS_MODULE,
+		.cra_priority           = HZIP_ALG_PRIORITY,
+		.cra_ctxsize		= sizeof(struct hisi_zip_ctx),
+	}
+};
+
+static struct acomp_alg hisi_zip_acomp_gzip = {
+	.init			= hisi_zip_acomp_init,
+	.exit			= hisi_zip_acomp_exit,
+	.compress		= hisi_zip_acompress,
+	.decompress		= hisi_zip_adecompress,
+	.base			= {
+		.cra_name		= "gzip",
+		.cra_driver_name	= "hisi-gzip-acomp",
+		.cra_module		= THIS_MODULE,
+		.cra_priority           = HZIP_ALG_PRIORITY,
+		.cra_ctxsize		= sizeof(struct hisi_zip_ctx),
+	}
+};
+
+int hisi_zip_register_to_crypto(void)
+{
+	int ret = 0;
+
+	ret = crypto_register_acomp(&hisi_zip_acomp_zlib);
+	if (ret) {
+		pr_err("Zlib acomp algorithm registration failed\n");
+		return ret;
+	}
+
+	ret = crypto_register_acomp(&hisi_zip_acomp_gzip);
+	if (ret) {
+		pr_err("Gzip acomp algorithm registration failed\n");
+		crypto_unregister_acomp(&hisi_zip_acomp_zlib);
+	}
+
+	return ret;
+}
+
+void hisi_zip_unregister_from_crypto(void)
+{
+	crypto_unregister_acomp(&hisi_zip_acomp_gzip);
+	crypto_unregister_acomp(&hisi_zip_acomp_zlib);
+}
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
new file mode 100644
index 0000000..6e0ca75
--- /dev/null
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -0,0 +1,1013 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 HiSilicon Limited. */
+#include <linux/acpi.h>
+#include <linux/aer.h>
+#include <linux/bitops.h>
+#include <linux/debugfs.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/seq_file.h>
+#include <linux/topology.h>
+#include "zip.h"
+
+#define PCI_DEVICE_ID_ZIP_PF		0xa250
+#define PCI_DEVICE_ID_ZIP_VF		0xa251
+
+#define HZIP_VF_NUM			63
+#define HZIP_QUEUE_NUM_V1		4096
+#define HZIP_QUEUE_NUM_V2		1024
+
+#define HZIP_CLOCK_GATE_CTRL		0x301004
+#define COMP0_ENABLE			BIT(0)
+#define COMP1_ENABLE			BIT(1)
+#define DECOMP0_ENABLE			BIT(2)
+#define DECOMP1_ENABLE			BIT(3)
+#define DECOMP2_ENABLE			BIT(4)
+#define DECOMP3_ENABLE			BIT(5)
+#define DECOMP4_ENABLE			BIT(6)
+#define DECOMP5_ENABLE			BIT(7)
+#define ALL_COMP_DECOMP_EN		(COMP0_ENABLE | COMP1_ENABLE |	\
+					 DECOMP0_ENABLE | DECOMP1_ENABLE | \
+					 DECOMP2_ENABLE | DECOMP3_ENABLE | \
+					 DECOMP4_ENABLE | DECOMP5_ENABLE)
+#define DECOMP_CHECK_ENABLE		BIT(16)
+#define HZIP_FSM_MAX_CNT		0x301008
+
+#define HZIP_PORT_ARCA_CHE_0		0x301040
+#define HZIP_PORT_ARCA_CHE_1		0x301044
+#define HZIP_PORT_AWCA_CHE_0		0x301060
+#define HZIP_PORT_AWCA_CHE_1		0x301064
+#define CACHE_ALL_EN			0xffffffff
+
+#define HZIP_BD_RUSER_32_63		0x301110
+#define HZIP_SGL_RUSER_32_63		0x30111c
+#define HZIP_DATA_RUSER_32_63		0x301128
+#define HZIP_DATA_WUSER_32_63		0x301134
+#define HZIP_BD_WUSER_32_63		0x301140
+
+#define HZIP_QM_IDEL_STATUS		0x3040e4
+
+#define HZIP_CORE_DEBUG_COMP_0		0x302000
+#define HZIP_CORE_DEBUG_COMP_1		0x303000
+#define HZIP_CORE_DEBUG_DECOMP_0	0x304000
+#define HZIP_CORE_DEBUG_DECOMP_1	0x305000
+#define HZIP_CORE_DEBUG_DECOMP_2	0x306000
+#define HZIP_CORE_DEBUG_DECOMP_3	0x307000
+#define HZIP_CORE_DEBUG_DECOMP_4	0x308000
+#define HZIP_CORE_DEBUG_DECOMP_5	0x309000
+
+#define HZIP_CORE_INT_SOURCE		0x3010A0
+#define HZIP_CORE_INT_MASK		0x3010A4
+#define HZIP_CORE_INT_STATUS		0x3010AC
+#define HZIP_CORE_INT_STATUS_M_ECC	BIT(1)
+#define HZIP_CORE_SRAM_ECC_ERR_INFO	0x301148
+#define SRAM_ECC_ERR_NUM_SHIFT		16
+#define SRAM_ECC_ERR_ADDR_SHIFT		24
+#define HZIP_CORE_INT_DISABLE		0x000007FF
+#define HZIP_COMP_CORE_NUM		2
+#define HZIP_DECOMP_CORE_NUM		6
+#define HZIP_CORE_NUM			(HZIP_COMP_CORE_NUM + \
+					 HZIP_DECOMP_CORE_NUM)
+#define HZIP_SQE_SIZE			128
+#define HZIP_SQ_SIZE			(HZIP_SQE_SIZE * QM_Q_DEPTH)
+#define HZIP_PF_DEF_Q_NUM		64
+#define HZIP_PF_DEF_Q_BASE		0
+
+#define HZIP_SOFT_CTRL_CNT_CLR_CE	0x301000
+#define SOFT_CTRL_CNT_CLR_CE_BIT	BIT(0)
+
+#define HZIP_NUMA_DISTANCE		100
+#define HZIP_BUF_SIZE			22
+
+static const char hisi_zip_name[] = "hisi_zip";
+static struct dentry *hzip_debugfs_root;
+LIST_HEAD(hisi_zip_list);
+DEFINE_MUTEX(hisi_zip_list_lock);
+
+#ifdef CONFIG_NUMA
+static struct hisi_zip *find_zip_device_numa(int node)
+{
+	struct hisi_zip *zip = NULL;
+	struct hisi_zip *hisi_zip;
+	int min_distance = HZIP_NUMA_DISTANCE;
+	struct device *dev;
+
+	list_for_each_entry(hisi_zip, &hisi_zip_list, list) {
+		dev = &hisi_zip->qm.pdev->dev;
+		if (node_distance(dev->numa_node, node) < min_distance) {
+			zip = hisi_zip;
+			min_distance = node_distance(dev->numa_node, node);
+		}
+	}
+
+	return zip;
+}
+#endif
+
+struct hisi_zip *find_zip_device(int node)
+{
+	struct hisi_zip *zip = NULL;
+
+	mutex_lock(&hisi_zip_list_lock);
+#ifdef CONFIG_NUMA
+	zip = find_zip_device_numa(node);
+#else
+	zip = list_first_entry(&hisi_zip_list, struct hisi_zip, list);
+#endif
+	mutex_unlock(&hisi_zip_list_lock);
+
+	return zip;
+}
+
+struct hisi_zip_hw_error {
+	u32 int_msk;
+	const char *msg;
+};
+
+static const struct hisi_zip_hw_error zip_hw_error[] = {
+	{ .int_msk = BIT(0), .msg = "zip_ecc_1bitt_err" },
+	{ .int_msk = BIT(1), .msg = "zip_ecc_2bit_err" },
+	{ .int_msk = BIT(2), .msg = "zip_axi_rresp_err" },
+	{ .int_msk = BIT(3), .msg = "zip_axi_bresp_err" },
+	{ .int_msk = BIT(4), .msg = "zip_src_addr_parse_err" },
+	{ .int_msk = BIT(5), .msg = "zip_dst_addr_parse_err" },
+	{ .int_msk = BIT(6), .msg = "zip_pre_in_addr_err" },
+	{ .int_msk = BIT(7), .msg = "zip_pre_in_data_err" },
+	{ .int_msk = BIT(8), .msg = "zip_com_inf_err" },
+	{ .int_msk = BIT(9), .msg = "zip_enc_inf_err" },
+	{ .int_msk = BIT(10), .msg = "zip_pre_out_err" },
+	{ /* sentinel */ }
+};
+
+enum ctrl_debug_file_index {
+	HZIP_CURRENT_QM,
+	HZIP_CLEAR_ENABLE,
+	HZIP_DEBUG_FILE_NUM,
+};
+
+static const char * const ctrl_debug_file_name[] = {
+	[HZIP_CURRENT_QM]   = "current_qm",
+	[HZIP_CLEAR_ENABLE] = "clear_enable",
+};
+
+struct ctrl_debug_file {
+	enum ctrl_debug_file_index index;
+	spinlock_t lock;
+	struct hisi_zip_ctrl *ctrl;
+};
+
+/*
+ * One ZIP controller has one PF and multiple VFs, some global configurations
+ * which PF has need this structure.
+ *
+ * Just relevant for PF.
+ */
+struct hisi_zip_ctrl {
+	u32 num_vfs;
+	struct hisi_zip *hisi_zip;
+	struct dentry *debug_root;
+	struct ctrl_debug_file files[HZIP_DEBUG_FILE_NUM];
+};
+
+enum {
+	HZIP_COMP_CORE0,
+	HZIP_COMP_CORE1,
+	HZIP_DECOMP_CORE0,
+	HZIP_DECOMP_CORE1,
+	HZIP_DECOMP_CORE2,
+	HZIP_DECOMP_CORE3,
+	HZIP_DECOMP_CORE4,
+	HZIP_DECOMP_CORE5,
+};
+
+static const u64 core_offsets[] = {
+	[HZIP_COMP_CORE0]   = 0x302000,
+	[HZIP_COMP_CORE1]   = 0x303000,
+	[HZIP_DECOMP_CORE0] = 0x304000,
+	[HZIP_DECOMP_CORE1] = 0x305000,
+	[HZIP_DECOMP_CORE2] = 0x306000,
+	[HZIP_DECOMP_CORE3] = 0x307000,
+	[HZIP_DECOMP_CORE4] = 0x308000,
+	[HZIP_DECOMP_CORE5] = 0x309000,
+};
+
+static struct debugfs_reg32 hzip_dfx_regs[] = {
+	{"HZIP_GET_BD_NUM                ",  0x00ull},
+	{"HZIP_GET_RIGHT_BD              ",  0x04ull},
+	{"HZIP_GET_ERROR_BD              ",  0x08ull},
+	{"HZIP_DONE_BD_NUM               ",  0x0cull},
+	{"HZIP_WORK_CYCLE                ",  0x10ull},
+	{"HZIP_IDLE_CYCLE                ",  0x18ull},
+	{"HZIP_MAX_DELAY                 ",  0x20ull},
+	{"HZIP_MIN_DELAY                 ",  0x24ull},
+	{"HZIP_AVG_DELAY                 ",  0x28ull},
+	{"HZIP_MEM_VISIBLE_DATA          ",  0x30ull},
+	{"HZIP_MEM_VISIBLE_ADDR          ",  0x34ull},
+	{"HZIP_COMSUMED_BYTE             ",  0x38ull},
+	{"HZIP_PRODUCED_BYTE             ",  0x40ull},
+	{"HZIP_COMP_INF                  ",  0x70ull},
+	{"HZIP_PRE_OUT                   ",  0x78ull},
+	{"HZIP_BD_RD                     ",  0x7cull},
+	{"HZIP_BD_WR                     ",  0x80ull},
+	{"HZIP_GET_BD_AXI_ERR_NUM        ",  0x84ull},
+	{"HZIP_GET_BD_PARSE_ERR_NUM      ",  0x88ull},
+	{"HZIP_ADD_BD_AXI_ERR_NUM        ",  0x8cull},
+	{"HZIP_DECOMP_STF_RELOAD_CURR_ST ",  0x94ull},
+	{"HZIP_DECOMP_LZ77_CURR_ST       ",  0x9cull},
+};
+
+static int pf_q_num_set(const char *val, const struct kernel_param *kp)
+{
+	struct pci_dev *pdev = pci_get_device(PCI_VENDOR_ID_HUAWEI,
+					      PCI_DEVICE_ID_ZIP_PF, NULL);
+	u32 n, q_num;
+	u8 rev_id;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	if (!pdev) {
+		q_num = min_t(u32, HZIP_QUEUE_NUM_V1, HZIP_QUEUE_NUM_V2);
+		pr_info("No device found currently, suppose queue number is %d\n",
+			q_num);
+	} else {
+		rev_id = pdev->revision;
+		switch (rev_id) {
+		case QM_HW_V1:
+			q_num = HZIP_QUEUE_NUM_V1;
+			break;
+		case QM_HW_V2:
+			q_num = HZIP_QUEUE_NUM_V2;
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	ret = kstrtou32(val, 10, &n);
+	if (ret != 0 || n > q_num || n == 0)
+		return -EINVAL;
+
+	return param_set_int(val, kp);
+}
+
+static const struct kernel_param_ops pf_q_num_ops = {
+	.set = pf_q_num_set,
+	.get = param_get_int,
+};
+
+static u32 pf_q_num = HZIP_PF_DEF_Q_NUM;
+module_param_cb(pf_q_num, &pf_q_num_ops, &pf_q_num, 0444);
+MODULE_PARM_DESC(pf_q_num, "Number of queues in PF(v1 1-4096, v2 1-1024)");
+
+static int uacce_mode;
+module_param(uacce_mode, int, 0);
+
+static const struct pci_device_id hisi_zip_dev_ids[] = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_PF) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_HUAWEI, PCI_DEVICE_ID_ZIP_VF) },
+	{ 0, }
+};
+MODULE_DEVICE_TABLE(pci, hisi_zip_dev_ids);
+
+static inline void hisi_zip_add_to_list(struct hisi_zip *hisi_zip)
+{
+	mutex_lock(&hisi_zip_list_lock);
+	list_add_tail(&hisi_zip->list, &hisi_zip_list);
+	mutex_unlock(&hisi_zip_list_lock);
+}
+
+static inline void hisi_zip_remove_from_list(struct hisi_zip *hisi_zip)
+{
+	mutex_lock(&hisi_zip_list_lock);
+	list_del(&hisi_zip->list);
+	mutex_unlock(&hisi_zip_list_lock);
+}
+
+static void hisi_zip_set_user_domain_and_cache(struct hisi_zip *hisi_zip)
+{
+	void __iomem *base = hisi_zip->qm.io_base;
+
+	/* qm user domain */
+	writel(AXUSER_BASE, base + QM_ARUSER_M_CFG_1);
+	writel(ARUSER_M_CFG_ENABLE, base + QM_ARUSER_M_CFG_ENABLE);
+	writel(AXUSER_BASE, base + QM_AWUSER_M_CFG_1);
+	writel(AWUSER_M_CFG_ENABLE, base + QM_AWUSER_M_CFG_ENABLE);
+	writel(WUSER_M_CFG_ENABLE, base + QM_WUSER_M_CFG_ENABLE);
+
+	/* qm cache */
+	writel(AXI_M_CFG, base + QM_AXI_M_CFG);
+	writel(AXI_M_CFG_ENABLE, base + QM_AXI_M_CFG_ENABLE);
+	/* disable FLR triggered by BME(bus master enable) */
+	writel(PEH_AXUSER_CFG, base + QM_PEH_AXUSER_CFG);
+	writel(PEH_AXUSER_CFG_ENABLE, base + QM_PEH_AXUSER_CFG_ENABLE);
+
+	/* cache */
+	writel(CACHE_ALL_EN, base + HZIP_PORT_ARCA_CHE_0);
+	writel(CACHE_ALL_EN, base + HZIP_PORT_ARCA_CHE_1);
+	writel(CACHE_ALL_EN, base + HZIP_PORT_AWCA_CHE_0);
+	writel(CACHE_ALL_EN, base + HZIP_PORT_AWCA_CHE_1);
+
+	/* user domain configurations */
+	writel(AXUSER_BASE, base + HZIP_BD_RUSER_32_63);
+	writel(AXUSER_BASE, base + HZIP_SGL_RUSER_32_63);
+	writel(AXUSER_BASE, base + HZIP_BD_WUSER_32_63);
+	writel(AXUSER_BASE, base + HZIP_DATA_RUSER_32_63);
+	writel(AXUSER_BASE, base + HZIP_DATA_WUSER_32_63);
+
+	/* let's open all compression/decompression cores */
+	writel(DECOMP_CHECK_ENABLE | ALL_COMP_DECOMP_EN,
+	       base + HZIP_CLOCK_GATE_CTRL);
+
+	/* enable sqc writeback */
+	writel(SQC_CACHE_ENABLE | CQC_CACHE_ENABLE | SQC_CACHE_WB_ENABLE |
+	       CQC_CACHE_WB_ENABLE | FIELD_PREP(SQC_CACHE_WB_THRD, 1) |
+	       FIELD_PREP(CQC_CACHE_WB_THRD, 1), base + QM_CACHE_CTL);
+}
+
+static void hisi_zip_hw_error_set_state(struct hisi_zip *hisi_zip, bool state)
+{
+	struct hisi_qm *qm = &hisi_zip->qm;
+
+	if (qm->ver == QM_HW_V1) {
+		writel(HZIP_CORE_INT_DISABLE, qm->io_base + HZIP_CORE_INT_MASK);
+		dev_info(&qm->pdev->dev, "ZIP v%d does not support hw error handle\n",
+			 qm->ver);
+		return;
+	}
+
+	if (state) {
+		/* clear ZIP hw error source if having */
+		writel(HZIP_CORE_INT_DISABLE, hisi_zip->qm.io_base +
+					      HZIP_CORE_INT_SOURCE);
+		/* enable ZIP hw error interrupts */
+		writel(0, hisi_zip->qm.io_base + HZIP_CORE_INT_MASK);
+	} else {
+		/* disable ZIP hw error interrupts */
+		writel(HZIP_CORE_INT_DISABLE,
+		       hisi_zip->qm.io_base + HZIP_CORE_INT_MASK);
+	}
+}
+
+static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file)
+{
+	struct hisi_zip *hisi_zip = file->ctrl->hisi_zip;
+
+	return &hisi_zip->qm;
+}
+
+static u32 current_qm_read(struct ctrl_debug_file *file)
+{
+	struct hisi_qm *qm = file_to_qm(file);
+
+	return readl(qm->io_base + QM_DFX_MB_CNT_VF);
+}
+
+static int current_qm_write(struct ctrl_debug_file *file, u32 val)
+{
+	struct hisi_qm *qm = file_to_qm(file);
+	struct hisi_zip_ctrl *ctrl = file->ctrl;
+	u32 vfq_num;
+	u32 tmp;
+
+	if (val > ctrl->num_vfs)
+		return -EINVAL;
+
+	/* Calculate curr_qm_qp_num and store */
+	if (val == 0) {
+		qm->debug.curr_qm_qp_num = qm->qp_num;
+	} else {
+		vfq_num = (qm->ctrl_qp_num - qm->qp_num) / ctrl->num_vfs;
+		if (val == ctrl->num_vfs)
+			qm->debug.curr_qm_qp_num = qm->ctrl_qp_num -
+				qm->qp_num - (ctrl->num_vfs - 1) * vfq_num;
+		else
+			qm->debug.curr_qm_qp_num = vfq_num;
+	}
+
+	writel(val, qm->io_base + QM_DFX_MB_CNT_VF);
+	writel(val, qm->io_base + QM_DFX_DB_CNT_VF);
+
+	tmp = val |
+	      (readl(qm->io_base + QM_DFX_SQE_CNT_VF_SQN) & CURRENT_Q_MASK);
+	writel(tmp, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+
+	tmp = val |
+	      (readl(qm->io_base + QM_DFX_CQE_CNT_VF_CQN) & CURRENT_Q_MASK);
+	writel(tmp, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+	return  0;
+}
+
+static u32 clear_enable_read(struct ctrl_debug_file *file)
+{
+	struct hisi_qm *qm = file_to_qm(file);
+
+	return readl(qm->io_base + HZIP_SOFT_CTRL_CNT_CLR_CE) &
+	       SOFT_CTRL_CNT_CLR_CE_BIT;
+}
+
+static int clear_enable_write(struct ctrl_debug_file *file, u32 val)
+{
+	struct hisi_qm *qm = file_to_qm(file);
+	u32 tmp;
+
+	if (val != 1 && val != 0)
+		return -EINVAL;
+
+	tmp = (readl(qm->io_base + HZIP_SOFT_CTRL_CNT_CLR_CE) &
+	       ~SOFT_CTRL_CNT_CLR_CE_BIT) | val;
+	writel(tmp, qm->io_base + HZIP_SOFT_CTRL_CNT_CLR_CE);
+
+	return  0;
+}
+
+static ssize_t ctrl_debug_read(struct file *filp, char __user *buf,
+			       size_t count, loff_t *pos)
+{
+	struct ctrl_debug_file *file = filp->private_data;
+	char tbuf[HZIP_BUF_SIZE];
+	u32 val;
+	int ret;
+
+	spin_lock_irq(&file->lock);
+	switch (file->index) {
+	case HZIP_CURRENT_QM:
+		val = current_qm_read(file);
+		break;
+	case HZIP_CLEAR_ENABLE:
+		val = clear_enable_read(file);
+		break;
+	default:
+		spin_unlock_irq(&file->lock);
+		return -EINVAL;
+	}
+	spin_unlock_irq(&file->lock);
+	ret = sprintf(tbuf, "%u\n", val);
+	return simple_read_from_buffer(buf, count, pos, tbuf, ret);
+}
+
+static ssize_t ctrl_debug_write(struct file *filp, const char __user *buf,
+				size_t count, loff_t *pos)
+{
+	struct ctrl_debug_file *file = filp->private_data;
+	char tbuf[HZIP_BUF_SIZE];
+	unsigned long val;
+	int len, ret;
+
+	if (*pos != 0)
+		return 0;
+
+	if (count >= HZIP_BUF_SIZE)
+		return -ENOSPC;
+
+	len = simple_write_to_buffer(tbuf, HZIP_BUF_SIZE - 1, pos, buf, count);
+	if (len < 0)
+		return len;
+
+	tbuf[len] = '\0';
+	if (kstrtoul(tbuf, 0, &val))
+		return -EFAULT;
+
+	spin_lock_irq(&file->lock);
+	switch (file->index) {
+	case HZIP_CURRENT_QM:
+		ret = current_qm_write(file, val);
+		if (ret)
+			goto err_input;
+		break;
+	case HZIP_CLEAR_ENABLE:
+		ret = clear_enable_write(file, val);
+		if (ret)
+			goto err_input;
+		break;
+	default:
+		ret = -EINVAL;
+		goto err_input;
+	}
+	spin_unlock_irq(&file->lock);
+
+	return count;
+
+err_input:
+	spin_unlock_irq(&file->lock);
+	return ret;
+}
+
+static const struct file_operations ctrl_debug_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = ctrl_debug_read,
+	.write = ctrl_debug_write,
+};
+
+static int hisi_zip_core_debug_init(struct hisi_zip_ctrl *ctrl)
+{
+	struct hisi_zip *hisi_zip = ctrl->hisi_zip;
+	struct hisi_qm *qm = &hisi_zip->qm;
+	struct device *dev = &qm->pdev->dev;
+	struct debugfs_regset32 *regset;
+	struct dentry *tmp_d, *tmp;
+	char buf[HZIP_BUF_SIZE];
+	int i;
+
+	for (i = 0; i < HZIP_CORE_NUM; i++) {
+		if (i < HZIP_COMP_CORE_NUM)
+			sprintf(buf, "comp_core%d", i);
+		else
+			sprintf(buf, "decomp_core%d", i - HZIP_COMP_CORE_NUM);
+
+		tmp_d = debugfs_create_dir(buf, ctrl->debug_root);
+		if (!tmp_d)
+			return -ENOENT;
+
+		regset = devm_kzalloc(dev, sizeof(*regset), GFP_KERNEL);
+		if (!regset)
+			return -ENOENT;
+
+		regset->regs = hzip_dfx_regs;
+		regset->nregs = ARRAY_SIZE(hzip_dfx_regs);
+		regset->base = qm->io_base + core_offsets[i];
+
+		tmp = debugfs_create_regset32("regs", 0444, tmp_d, regset);
+		if (!tmp)
+			return -ENOENT;
+	}
+
+	return 0;
+}
+
+static int hisi_zip_ctrl_debug_init(struct hisi_zip_ctrl *ctrl)
+{
+	struct dentry *tmp;
+	int i;
+
+	for (i = HZIP_CURRENT_QM; i < HZIP_DEBUG_FILE_NUM; i++) {
+		spin_lock_init(&ctrl->files[i].lock);
+		ctrl->files[i].ctrl = ctrl;
+		ctrl->files[i].index = i;
+
+		tmp = debugfs_create_file(ctrl_debug_file_name[i], 0600,
+					  ctrl->debug_root, ctrl->files + i,
+					  &ctrl_debug_fops);
+		if (!tmp)
+			return -ENOENT;
+	}
+
+	return hisi_zip_core_debug_init(ctrl);
+}
+
+static int hisi_zip_debugfs_init(struct hisi_zip *hisi_zip)
+{
+	struct hisi_qm *qm = &hisi_zip->qm;
+	struct device *dev = &qm->pdev->dev;
+	struct dentry *dev_d;
+	int ret;
+
+	dev_d = debugfs_create_dir(dev_name(dev), hzip_debugfs_root);
+	if (!dev_d)
+		return -ENOENT;
+
+	qm->debug.debug_root = dev_d;
+	ret = hisi_qm_debug_init(qm);
+	if (ret)
+		goto failed_to_create;
+
+	if (qm->fun_type == QM_HW_PF) {
+		hisi_zip->ctrl->debug_root = dev_d;
+		ret = hisi_zip_ctrl_debug_init(hisi_zip->ctrl);
+		if (ret)
+			goto failed_to_create;
+	}
+
+	return 0;
+
+failed_to_create:
+	debugfs_remove_recursive(hzip_debugfs_root);
+	return ret;
+}
+
+static void hisi_zip_debug_regs_clear(struct hisi_zip *hisi_zip)
+{
+	struct hisi_qm *qm = &hisi_zip->qm;
+
+	writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
+	writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
+	writel(0x0, qm->io_base + HZIP_SOFT_CTRL_CNT_CLR_CE);
+
+	hisi_qm_debug_regs_clear(qm);
+}
+
+static void hisi_zip_debugfs_exit(struct hisi_zip *hisi_zip)
+{
+	struct hisi_qm *qm = &hisi_zip->qm;
+
+	debugfs_remove_recursive(qm->debug.debug_root);
+
+	if (qm->fun_type == QM_HW_PF)
+		hisi_zip_debug_regs_clear(hisi_zip);
+}
+
+static void hisi_zip_hw_error_init(struct hisi_zip *hisi_zip)
+{
+	hisi_qm_hw_error_init(&hisi_zip->qm, QM_BASE_CE,
+			      QM_BASE_NFE | QM_ACC_WB_NOT_READY_TIMEOUT, 0,
+			      QM_DB_RANDOM_INVALID);
+	hisi_zip_hw_error_set_state(hisi_zip, true);
+}
+
+static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
+{
+	struct hisi_qm *qm = &hisi_zip->qm;
+	struct hisi_zip_ctrl *ctrl;
+
+	ctrl = devm_kzalloc(&qm->pdev->dev, sizeof(*ctrl), GFP_KERNEL);
+	if (!ctrl)
+		return -ENOMEM;
+
+	hisi_zip->ctrl = ctrl;
+	ctrl->hisi_zip = hisi_zip;
+
+	switch (qm->ver) {
+	case QM_HW_V1:
+		qm->ctrl_qp_num = HZIP_QUEUE_NUM_V1;
+		break;
+
+	case QM_HW_V2:
+		qm->ctrl_qp_num = HZIP_QUEUE_NUM_V2;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	hisi_zip_set_user_domain_and_cache(hisi_zip);
+	hisi_zip_hw_error_init(hisi_zip);
+	hisi_zip_debug_regs_clear(hisi_zip);
+
+	return 0;
+}
+
+static int hisi_zip_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+	struct hisi_zip *hisi_zip;
+	enum qm_hw_ver rev_id;
+	struct hisi_qm *qm;
+	int ret;
+
+	rev_id = hisi_qm_get_hw_version(pdev);
+	if (rev_id == QM_HW_UNKNOWN)
+		return -EINVAL;
+
+	hisi_zip = devm_kzalloc(&pdev->dev, sizeof(*hisi_zip), GFP_KERNEL);
+	if (!hisi_zip)
+		return -ENOMEM;
+	pci_set_drvdata(pdev, hisi_zip);
+
+	qm = &hisi_zip->qm;
+	qm->pdev = pdev;
+	qm->ver = rev_id;
+
+	qm->sqe_size = HZIP_SQE_SIZE;
+	qm->dev_name = hisi_zip_name;
+	qm->fun_type = (pdev->device == PCI_DEVICE_ID_ZIP_PF) ? QM_HW_PF :
+								QM_HW_VF;
+	switch (uacce_mode) {
+	case 0:
+		qm->use_dma_api = true;
+		break;
+	case 1:
+		qm->use_dma_api = false;
+		break;
+	case 2:
+		qm->use_dma_api = true;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = hisi_qm_init(qm);
+	if (ret) {
+		dev_err(&pdev->dev, "Failed to init qm!\n");
+		return ret;
+	}
+
+	if (qm->fun_type == QM_HW_PF) {
+		ret = hisi_zip_pf_probe_init(hisi_zip);
+		if (ret)
+			return ret;
+
+		qm->qp_base = HZIP_PF_DEF_Q_BASE;
+		qm->qp_num = pf_q_num;
+	} else if (qm->fun_type == QM_HW_VF) {
+		/*
+		 * have no way to get qm configure in VM in v1 hardware,
+		 * so currently force PF to uses HZIP_PF_DEF_Q_NUM, and force
+		 * to trigger only one VF in v1 hardware.
+		 *
+		 * v2 hardware has no such problem.
+		 */
+		if (qm->ver == QM_HW_V1) {
+			qm->qp_base = HZIP_PF_DEF_Q_NUM;
+			qm->qp_num = HZIP_QUEUE_NUM_V1 - HZIP_PF_DEF_Q_NUM;
+		} else if (qm->ver == QM_HW_V2)
+			/* v2 starts to support get vft by mailbox */
+			hisi_qm_get_vft(qm, &qm->qp_base, &qm->qp_num);
+	}
+
+	ret = hisi_qm_start(qm);
+	if (ret)
+		goto err_qm_uninit;
+
+	ret = hisi_zip_debugfs_init(hisi_zip);
+	if (ret)
+		dev_err(&pdev->dev, "Failed to init debugfs (%d)!\n", ret);
+
+	hisi_zip_add_to_list(hisi_zip);
+
+	return 0;
+
+err_qm_uninit:
+	hisi_qm_uninit(qm);
+	return ret;
+}
+
+/* Currently we only support equal assignment */
+static int hisi_zip_vf_q_assign(struct hisi_zip *hisi_zip, int num_vfs)
+{
+	struct hisi_qm *qm = &hisi_zip->qm;
+	u32 qp_num = qm->qp_num;
+	u32 q_base = qp_num;
+	u32 q_num, remain_q_num, i;
+	int ret;
+
+	if (!num_vfs)
+		return -EINVAL;
+
+	remain_q_num = qm->ctrl_qp_num - qp_num;
+	if (remain_q_num < num_vfs)
+		return -EINVAL;
+
+	q_num = remain_q_num / num_vfs;
+	for (i = 1; i <= num_vfs; i++) {
+		if (i == num_vfs)
+			q_num += remain_q_num % num_vfs;
+		ret = hisi_qm_set_vft(qm, i, q_base, q_num);
+		if (ret)
+			return ret;
+		q_base += q_num;
+	}
+
+	return 0;
+}
+
+static int hisi_zip_clear_vft_config(struct hisi_zip *hisi_zip)
+{
+	struct hisi_zip_ctrl *ctrl = hisi_zip->ctrl;
+	struct hisi_qm *qm = &hisi_zip->qm;
+	u32 i, num_vfs = ctrl->num_vfs;
+	int ret;
+
+	for (i = 1; i <= num_vfs; i++) {
+		ret = hisi_qm_set_vft(qm, i, 0, 0);
+		if (ret)
+			return ret;
+	}
+
+	ctrl->num_vfs = 0;
+
+	return 0;
+}
+
+static int hisi_zip_sriov_enable(struct pci_dev *pdev, int max_vfs)
+{
+#ifdef CONFIG_PCI_IOV
+	struct hisi_zip *hisi_zip = pci_get_drvdata(pdev);
+	int pre_existing_vfs, num_vfs, ret;
+
+	pre_existing_vfs = pci_num_vf(pdev);
+
+	if (pre_existing_vfs) {
+		dev_err(&pdev->dev,
+			"Can't enable VF. Please disable pre-enabled VFs!\n");
+		return 0;
+	}
+
+	num_vfs = min_t(int, max_vfs, HZIP_VF_NUM);
+
+	ret = hisi_zip_vf_q_assign(hisi_zip, num_vfs);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't assign queues for VF!\n");
+		return ret;
+	}
+
+	hisi_zip->ctrl->num_vfs = num_vfs;
+
+	ret = pci_enable_sriov(pdev, num_vfs);
+	if (ret) {
+		dev_err(&pdev->dev, "Can't enable VF!\n");
+		hisi_zip_clear_vft_config(hisi_zip);
+		return ret;
+	}
+
+	return num_vfs;
+#else
+	return 0;
+#endif
+}
+
+static int hisi_zip_sriov_disable(struct pci_dev *pdev)
+{
+	struct hisi_zip *hisi_zip = pci_get_drvdata(pdev);
+
+	if (pci_vfs_assigned(pdev)) {
+		dev_err(&pdev->dev,
+			"Can't disable VFs while VFs are assigned!\n");
+		return -EPERM;
+	}
+
+	/* remove in hisi_zip_pci_driver will be called to free VF resources */
+	pci_disable_sriov(pdev);
+
+	return hisi_zip_clear_vft_config(hisi_zip);
+}
+
+static int hisi_zip_sriov_configure(struct pci_dev *pdev, int num_vfs)
+{
+	if (num_vfs == 0)
+		return hisi_zip_sriov_disable(pdev);
+	else
+		return hisi_zip_sriov_enable(pdev, num_vfs);
+}
+
+static void hisi_zip_remove(struct pci_dev *pdev)
+{
+	struct hisi_zip *hisi_zip = pci_get_drvdata(pdev);
+	struct hisi_qm *qm = &hisi_zip->qm;
+
+	if (qm->fun_type == QM_HW_PF && hisi_zip->ctrl->num_vfs != 0)
+		hisi_zip_sriov_disable(pdev);
+
+	hisi_zip_debugfs_exit(hisi_zip);
+	hisi_qm_stop(qm);
+
+	if (qm->fun_type == QM_HW_PF)
+		hisi_zip_hw_error_set_state(hisi_zip, false);
+
+	hisi_qm_uninit(qm);
+	hisi_zip_remove_from_list(hisi_zip);
+}
+
+static void hisi_zip_log_hw_error(struct hisi_zip *hisi_zip, u32 err_sts)
+{
+	const struct hisi_zip_hw_error *err = zip_hw_error;
+	struct device *dev = &hisi_zip->qm.pdev->dev;
+	u32 err_val;
+
+	while (err->msg) {
+		if (err->int_msk & err_sts) {
+			dev_warn(dev, "%s [error status=0x%x] found\n",
+				 err->msg, err->int_msk);
+
+			if (HZIP_CORE_INT_STATUS_M_ECC & err->int_msk) {
+				err_val = readl(hisi_zip->qm.io_base +
+						HZIP_CORE_SRAM_ECC_ERR_INFO);
+				dev_warn(dev, "hisi-zip multi ecc sram num=0x%x\n",
+					 ((err_val >> SRAM_ECC_ERR_NUM_SHIFT) &
+					  0xFF));
+				dev_warn(dev, "hisi-zip multi ecc sram addr=0x%x\n",
+					 (err_val >> SRAM_ECC_ERR_ADDR_SHIFT));
+			}
+		}
+		err++;
+	}
+}
+
+static pci_ers_result_t hisi_zip_hw_error_handle(struct hisi_zip *hisi_zip)
+{
+	u32 err_sts;
+
+	/* read err sts */
+	err_sts = readl(hisi_zip->qm.io_base + HZIP_CORE_INT_STATUS);
+
+	if (err_sts) {
+		hisi_zip_log_hw_error(hisi_zip, err_sts);
+		/* clear error interrupts */
+		writel(err_sts, hisi_zip->qm.io_base + HZIP_CORE_INT_SOURCE);
+
+		return PCI_ERS_RESULT_NEED_RESET;
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t hisi_zip_process_hw_error(struct pci_dev *pdev)
+{
+	struct hisi_zip *hisi_zip = pci_get_drvdata(pdev);
+	struct device *dev = &pdev->dev;
+	pci_ers_result_t qm_ret, zip_ret;
+
+	if (!hisi_zip) {
+		dev_err(dev,
+			"Can't recover ZIP-error occurred during device init\n");
+		return PCI_ERS_RESULT_NONE;
+	}
+
+	qm_ret = hisi_qm_hw_error_handle(&hisi_zip->qm);
+
+	zip_ret = hisi_zip_hw_error_handle(hisi_zip);
+
+	return (qm_ret == PCI_ERS_RESULT_NEED_RESET ||
+		zip_ret == PCI_ERS_RESULT_NEED_RESET) ?
+	       PCI_ERS_RESULT_NEED_RESET : PCI_ERS_RESULT_RECOVERED;
+}
+
+static pci_ers_result_t hisi_zip_error_detected(struct pci_dev *pdev,
+						pci_channel_state_t state)
+{
+	if (pdev->is_virtfn)
+		return PCI_ERS_RESULT_NONE;
+
+	dev_info(&pdev->dev, "PCI error detected, state(=%d)!!\n", state);
+	if (state == pci_channel_io_perm_failure)
+		return PCI_ERS_RESULT_DISCONNECT;
+
+	return hisi_zip_process_hw_error(pdev);
+}
+
+static const struct pci_error_handlers hisi_zip_err_handler = {
+	.error_detected	= hisi_zip_error_detected,
+};
+
+static struct pci_driver hisi_zip_pci_driver = {
+	.name			= "hisi_zip",
+	.id_table		= hisi_zip_dev_ids,
+	.probe			= hisi_zip_probe,
+	.remove			= hisi_zip_remove,
+	.sriov_configure	= hisi_zip_sriov_configure,
+	.err_handler		= &hisi_zip_err_handler,
+};
+
+static void hisi_zip_register_debugfs(void)
+{
+	if (!debugfs_initialized())
+		return;
+
+	hzip_debugfs_root = debugfs_create_dir("hisi_zip", NULL);
+	if (IS_ERR_OR_NULL(hzip_debugfs_root))
+		hzip_debugfs_root = NULL;
+}
+
+static void hisi_zip_unregister_debugfs(void)
+{
+	debugfs_remove_recursive(hzip_debugfs_root);
+}
+
+static int __init hisi_zip_init(void)
+{
+	int ret;
+
+	hisi_zip_register_debugfs();
+
+	ret = pci_register_driver(&hisi_zip_pci_driver);
+	if (ret < 0) {
+		pr_err("Failed to register pci driver.\n");
+		goto err_pci;
+	}
+
+	if (uacce_mode == 0 || uacce_mode == 2) {
+		ret = hisi_zip_register_to_crypto();
+		if (ret < 0) {
+			pr_err("Failed to register driver to crypto.\n");
+			goto err_crypto;
+		}
+	}
+
+	return 0;
+
+err_crypto:
+	pci_unregister_driver(&hisi_zip_pci_driver);
+err_pci:
+	hisi_zip_unregister_debugfs();
+
+	return ret;
+}
+
+static void __exit hisi_zip_exit(void)
+{
+	if (uacce_mode == 0 || uacce_mode == 2)
+		hisi_zip_unregister_from_crypto();
+	pci_unregister_driver(&hisi_zip_pci_driver);
+	hisi_zip_unregister_debugfs();
+}
+
+module_init(hisi_zip_init);
+module_exit(hisi_zip_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Zhou Wang <wangzhou1@hisilicon.com>");
+MODULE_DESCRIPTION("Driver for HiSilicon ZIP accelerator");
diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c
index d27c812..fe4cc8b 100644
--- a/drivers/crypto/img-hash.c
+++ b/drivers/crypto/img-hash.c
@@ -958,9 +958,7 @@
 	crypto_init_queue(&hdev->queue, IMG_HASH_QUEUE_LENGTH);
 
 	/* Register bank */
-	hash_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-
-	hdev->io_base = devm_ioremap_resource(dev, hash_res);
+	hdev->io_base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(hdev->io_base)) {
 		err = PTR_ERR(hdev->io_base);
 		dev_err(dev, "can't ioremap, returned %d\n", err);
@@ -980,7 +978,6 @@
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
-		dev_err(dev, "no IRQ resource info\n");
 		err = irq;
 		goto res_err;
 	}
diff --git a/drivers/crypto/inside-secure/safexcel.c b/drivers/crypto/inside-secure/safexcel.c
index df43a2c..b456b85 100644
--- a/drivers/crypto/inside-secure/safexcel.c
+++ b/drivers/crypto/inside-secure/safexcel.c
@@ -14,6 +14,7 @@
 #include <linux/module.h>
 #include <linux/of_platform.h>
 #include <linux/of_irq.h>
+#include <linux/pci.h>
 #include <linux/platform_device.h>
 #include <linux/workqueue.h>
 
@@ -27,62 +28,205 @@
 module_param(max_rings, uint, 0644);
 MODULE_PARM_DESC(max_rings, "Maximum number of rings to use.");
 
-static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv)
+static void eip197_trc_cache_setupvirt(struct safexcel_crypto_priv *priv)
 {
-	u32 val, htable_offset;
-	int i, cs_rc_max, cs_ht_wc, cs_trc_rec_wc, cs_trc_lg_rec_wc;
-
-	if (priv->version == EIP197B) {
-		cs_rc_max = EIP197B_CS_RC_MAX;
-		cs_ht_wc = EIP197B_CS_HT_WC;
-		cs_trc_rec_wc = EIP197B_CS_TRC_REC_WC;
-		cs_trc_lg_rec_wc = EIP197B_CS_TRC_LG_REC_WC;
-	} else {
-		cs_rc_max = EIP197D_CS_RC_MAX;
-		cs_ht_wc = EIP197D_CS_HT_WC;
-		cs_trc_rec_wc = EIP197D_CS_TRC_REC_WC;
-		cs_trc_lg_rec_wc = EIP197D_CS_TRC_LG_REC_WC;
-	}
-
-	/* Enable the record cache memory access */
-	val = readl(priv->base + EIP197_CS_RAM_CTRL);
-	val &= ~EIP197_TRC_ENABLE_MASK;
-	val |= EIP197_TRC_ENABLE_0;
-	writel(val, priv->base + EIP197_CS_RAM_CTRL);
-
-	/* Clear all ECC errors */
-	writel(0, priv->base + EIP197_TRC_ECCCTRL);
+	int i;
 
 	/*
-	 * Make sure the cache memory is accessible by taking record cache into
-	 * reset.
+	 * Map all interfaces/rings to register index 0
+	 * so they can share contexts. Without this, the EIP197 will
+	 * assume each interface/ring to be in its own memory domain
+	 * i.e. have its own subset of UNIQUE memory addresses.
+	 * Which would cause records with the SAME memory address to
+	 * use DIFFERENT cache buffers, causing both poor cache utilization
+	 * AND serious coherence/invalidation issues.
 	 */
-	val = readl(priv->base + EIP197_TRC_PARAMS);
-	val |= EIP197_TRC_PARAMS_SW_RESET;
-	val &= ~EIP197_TRC_PARAMS_DATA_ACCESS;
-	writel(val, priv->base + EIP197_TRC_PARAMS);
+	for (i = 0; i < 4; i++)
+		writel(0, priv->base + EIP197_FLUE_IFC_LUT(i));
 
-	/* Clear all records */
+	/*
+	 * Initialize other virtualization regs for cache
+	 * These may not be in their reset state ...
+	 */
+	for (i = 0; i < priv->config.rings; i++) {
+		writel(0, priv->base + EIP197_FLUE_CACHEBASE_LO(i));
+		writel(0, priv->base + EIP197_FLUE_CACHEBASE_HI(i));
+		writel(EIP197_FLUE_CONFIG_MAGIC,
+		       priv->base + EIP197_FLUE_CONFIG(i));
+	}
+	writel(0, priv->base + EIP197_FLUE_OFFSETS);
+	writel(0, priv->base + EIP197_FLUE_ARC4_OFFSET);
+}
+
+static void eip197_trc_cache_banksel(struct safexcel_crypto_priv *priv,
+				     u32 addrmid, int *actbank)
+{
+	u32 val;
+	int curbank;
+
+	curbank = addrmid >> 16;
+	if (curbank != *actbank) {
+		val = readl(priv->base + EIP197_CS_RAM_CTRL);
+		val = (val & ~EIP197_CS_BANKSEL_MASK) |
+		      (curbank << EIP197_CS_BANKSEL_OFS);
+		writel(val, priv->base + EIP197_CS_RAM_CTRL);
+		*actbank = curbank;
+	}
+}
+
+static u32 eip197_trc_cache_probe(struct safexcel_crypto_priv *priv,
+				  int maxbanks, u32 probemask)
+{
+	u32 val, addrhi, addrlo, addrmid;
+	int actbank;
+
+	/*
+	 * And probe the actual size of the physically attached cache data RAM
+	 * Using a binary subdivision algorithm downto 32 byte cache lines.
+	 */
+	addrhi = 1 << (16 + maxbanks);
+	addrlo = 0;
+	actbank = min(maxbanks - 1, 0);
+	while ((addrhi - addrlo) > 32) {
+		/* write marker to lowest address in top half */
+		addrmid = (addrhi + addrlo) >> 1;
+		eip197_trc_cache_banksel(priv, addrmid, &actbank);
+		writel((addrmid | (addrlo << 16)) & probemask,
+			priv->base + EIP197_CLASSIFICATION_RAMS +
+			(addrmid & 0xffff));
+
+		/* write marker to lowest address in bottom half */
+		eip197_trc_cache_banksel(priv, addrlo, &actbank);
+		writel((addrlo | (addrhi << 16)) & probemask,
+			priv->base + EIP197_CLASSIFICATION_RAMS +
+			(addrlo & 0xffff));
+
+		/* read back marker from top half */
+		eip197_trc_cache_banksel(priv, addrmid, &actbank);
+		val = readl(priv->base + EIP197_CLASSIFICATION_RAMS +
+			    (addrmid & 0xffff));
+
+		if (val == ((addrmid | (addrlo << 16)) & probemask)) {
+			/* read back correct, continue with top half */
+			addrlo = addrmid;
+		} else {
+			/* not read back correct, continue with bottom half */
+			addrhi = addrmid;
+		}
+	}
+	return addrhi;
+}
+
+static void eip197_trc_cache_clear(struct safexcel_crypto_priv *priv,
+				   int cs_rc_max, int cs_ht_wc)
+{
+	int i;
+	u32 htable_offset, val, offset;
+
+	/* Clear all records in administration RAM */
 	for (i = 0; i < cs_rc_max; i++) {
-		u32 val, offset = EIP197_CLASSIFICATION_RAMS + i * EIP197_CS_RC_SIZE;
+		offset = EIP197_CLASSIFICATION_RAMS + i * EIP197_CS_RC_SIZE;
 
 		writel(EIP197_CS_RC_NEXT(EIP197_RC_NULL) |
 		       EIP197_CS_RC_PREV(EIP197_RC_NULL),
 		       priv->base + offset);
 
-		val = EIP197_CS_RC_NEXT(i+1) | EIP197_CS_RC_PREV(i-1);
+		val = EIP197_CS_RC_NEXT(i + 1) | EIP197_CS_RC_PREV(i - 1);
 		if (i == 0)
 			val |= EIP197_CS_RC_PREV(EIP197_RC_NULL);
 		else if (i == cs_rc_max - 1)
 			val |= EIP197_CS_RC_NEXT(EIP197_RC_NULL);
-		writel(val, priv->base + offset + sizeof(u32));
+		writel(val, priv->base + offset + 4);
+		/* must also initialize the address key due to ECC! */
+		writel(0, priv->base + offset + 8);
+		writel(0, priv->base + offset + 12);
 	}
 
 	/* Clear the hash table entries */
 	htable_offset = cs_rc_max * EIP197_CS_RC_SIZE;
 	for (i = 0; i < cs_ht_wc; i++)
 		writel(GENMASK(29, 0),
-		       priv->base + EIP197_CLASSIFICATION_RAMS + htable_offset + i * sizeof(u32));
+		       priv->base + EIP197_CLASSIFICATION_RAMS +
+		       htable_offset + i * sizeof(u32));
+}
+
+static void eip197_trc_cache_init(struct safexcel_crypto_priv *priv)
+{
+	u32 val, dsize, asize;
+	int cs_rc_max, cs_ht_wc, cs_trc_rec_wc, cs_trc_lg_rec_wc;
+	int cs_rc_abs_max, cs_ht_sz;
+	int maxbanks;
+
+	/* Setup (dummy) virtualization for cache */
+	eip197_trc_cache_setupvirt(priv);
+
+	/*
+	 * Enable the record cache memory access and
+	 * probe the bank select width
+	 */
+	val = readl(priv->base + EIP197_CS_RAM_CTRL);
+	val &= ~EIP197_TRC_ENABLE_MASK;
+	val |= EIP197_TRC_ENABLE_0 | EIP197_CS_BANKSEL_MASK;
+	writel(val, priv->base + EIP197_CS_RAM_CTRL);
+	val = readl(priv->base + EIP197_CS_RAM_CTRL);
+	maxbanks = ((val&EIP197_CS_BANKSEL_MASK)>>EIP197_CS_BANKSEL_OFS) + 1;
+
+	/* Clear all ECC errors */
+	writel(0, priv->base + EIP197_TRC_ECCCTRL);
+
+	/*
+	 * Make sure the cache memory is accessible by taking record cache into
+	 * reset. Need data memory access here, not admin access.
+	 */
+	val = readl(priv->base + EIP197_TRC_PARAMS);
+	val |= EIP197_TRC_PARAMS_SW_RESET | EIP197_TRC_PARAMS_DATA_ACCESS;
+	writel(val, priv->base + EIP197_TRC_PARAMS);
+
+	/* Probed data RAM size in bytes */
+	dsize = eip197_trc_cache_probe(priv, maxbanks, 0xffffffff);
+
+	/*
+	 * Now probe the administration RAM size pretty much the same way
+	 * Except that only the lower 30 bits are writable and we don't need
+	 * bank selects
+	 */
+	val = readl(priv->base + EIP197_TRC_PARAMS);
+	/* admin access now */
+	val &= ~(EIP197_TRC_PARAMS_DATA_ACCESS | EIP197_CS_BANKSEL_MASK);
+	writel(val, priv->base + EIP197_TRC_PARAMS);
+
+	/* Probed admin RAM size in admin words */
+	asize = eip197_trc_cache_probe(priv, 0, 0xbfffffff) >> 4;
+
+	/* Clear any ECC errors detected while probing! */
+	writel(0, priv->base + EIP197_TRC_ECCCTRL);
+
+	/*
+	 * Determine optimal configuration from RAM sizes
+	 * Note that we assume that the physical RAM configuration is sane
+	 * Therefore, we don't do any parameter error checking here ...
+	 */
+
+	/* For now, just use a single record format covering everything */
+	cs_trc_rec_wc = EIP197_CS_TRC_REC_WC;
+	cs_trc_lg_rec_wc = EIP197_CS_TRC_REC_WC;
+
+	/*
+	 * Step #1: How many records will physically fit?
+	 * Hard upper limit is 1023!
+	 */
+	cs_rc_abs_max = min_t(uint, ((dsize >> 2) / cs_trc_lg_rec_wc), 1023);
+	/* Step #2: Need at least 2 words in the admin RAM per record */
+	cs_rc_max = min_t(uint, cs_rc_abs_max, (asize >> 1));
+	/* Step #3: Determine log2 of hash table size */
+	cs_ht_sz = __fls(asize - cs_rc_max) - 2;
+	/* Step #4: determine current size of hash table in dwords */
+	cs_ht_wc = 16<<cs_ht_sz; /* dwords, not admin words */
+	/* Step #5: add back excess words and see if we can fit more records */
+	cs_rc_max = min_t(uint, cs_rc_abs_max, asize - (cs_ht_wc >> 4));
+
+	/* Clear the cache RAMs */
+	eip197_trc_cache_clear(priv, cs_rc_max, cs_ht_wc);
 
 	/* Disable the record cache memory access */
 	val = readl(priv->base + EIP197_CS_RAM_CTRL);
@@ -102,83 +246,24 @@
 	/* Configure the record cache #2 */
 	val = EIP197_TRC_PARAMS_RC_SZ_LARGE(cs_trc_lg_rec_wc) |
 	      EIP197_TRC_PARAMS_BLK_TIMER_SPEED(1) |
-	      EIP197_TRC_PARAMS_HTABLE_SZ(2);
+	      EIP197_TRC_PARAMS_HTABLE_SZ(cs_ht_sz);
 	writel(val, priv->base + EIP197_TRC_PARAMS);
+
+	dev_info(priv->dev, "TRC init: %dd,%da (%dr,%dh)\n",
+		 dsize, asize, cs_rc_max, cs_ht_wc + cs_ht_wc);
 }
 
-static void eip197_write_firmware(struct safexcel_crypto_priv *priv,
-				  const struct firmware *fw, int pe, u32 ctrl,
-				  u32 prog_en)
+static void eip197_init_firmware(struct safexcel_crypto_priv *priv)
 {
-	const u32 *data = (const u32 *)fw->data;
+	int pe, i;
 	u32 val;
-	int i;
-
-	/* Reset the engine to make its program memory accessible */
-	writel(EIP197_PE_ICE_x_CTRL_SW_RESET |
-	       EIP197_PE_ICE_x_CTRL_CLR_ECC_CORR |
-	       EIP197_PE_ICE_x_CTRL_CLR_ECC_NON_CORR,
-	       EIP197_PE(priv) + ctrl);
-
-	/* Enable access to the program memory */
-	writel(prog_en, EIP197_PE(priv) + EIP197_PE_ICE_RAM_CTRL(pe));
-
-	/* Write the firmware */
-	for (i = 0; i < fw->size / sizeof(u32); i++)
-		writel(be32_to_cpu(data[i]),
-		       priv->base + EIP197_CLASSIFICATION_RAMS + i * sizeof(u32));
-
-	/* Disable access to the program memory */
-	writel(0, EIP197_PE(priv) + EIP197_PE_ICE_RAM_CTRL(pe));
-
-	/* Release engine from reset */
-	val = readl(EIP197_PE(priv) + ctrl);
-	val &= ~EIP197_PE_ICE_x_CTRL_SW_RESET;
-	writel(val, EIP197_PE(priv) + ctrl);
-}
-
-static int eip197_load_firmwares(struct safexcel_crypto_priv *priv)
-{
-	const char *fw_name[] = {"ifpp.bin", "ipue.bin"};
-	const struct firmware *fw[FW_NB];
-	char fw_path[31], *dir = NULL;
-	int i, j, ret = 0, pe;
-	u32 val;
-
-	switch (priv->version) {
-	case EIP197B:
-		dir = "eip197b";
-		break;
-	case EIP197D:
-		dir = "eip197d";
-		break;
-	default:
-		/* No firmware is required */
-		return 0;
-	}
-
-	for (i = 0; i < FW_NB; i++) {
-		snprintf(fw_path, 31, "inside-secure/%s/%s", dir, fw_name[i]);
-		ret = request_firmware(&fw[i], fw_path, priv->dev);
-		if (ret) {
-			if (priv->version != EIP197B)
-				goto release_fw;
-
-			/* Fallback to the old firmware location for the
-			 * EIP197b.
-			 */
-			ret = request_firmware(&fw[i], fw_name[i], priv->dev);
-			if (ret) {
-				dev_err(priv->dev,
-					"Failed to request firmware %s (%d)\n",
-					fw_name[i], ret);
-				goto release_fw;
-			}
-		}
-	}
 
 	for (pe = 0; pe < priv->config.pes; pe++) {
-		/* Clear the scratchpad memory */
+		/* Configure the token FIFO's */
+		writel(3, EIP197_PE(priv) + EIP197_PE_ICE_PUTF_CTRL(pe));
+		writel(0, EIP197_PE(priv) + EIP197_PE_ICE_PPTF_CTRL(pe));
+
+		/* Clear the ICE scratchpad memory */
 		val = readl(EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_CTRL(pe));
 		val |= EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_TIMER |
 		       EIP197_PE_ICE_SCRATCH_CTRL_TIMER_EN |
@@ -186,35 +271,206 @@
 		       EIP197_PE_ICE_SCRATCH_CTRL_CHANGE_ACCESS;
 		writel(val, EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_CTRL(pe));
 
-		memset_io(EIP197_PE(priv) + EIP197_PE_ICE_SCRATCH_RAM(pe), 0,
-			  EIP197_NUM_OF_SCRATCH_BLOCKS * sizeof(u32));
+		/* clear the scratchpad RAM using 32 bit writes only */
+		for (i = 0; i < EIP197_NUM_OF_SCRATCH_BLOCKS; i++)
+			writel(0, EIP197_PE(priv) +
+				  EIP197_PE_ICE_SCRATCH_RAM(pe) + (i << 2));
 
-		eip197_write_firmware(priv, fw[FW_IFPP], pe,
-				      EIP197_PE_ICE_FPP_CTRL(pe),
-				      EIP197_PE_ICE_RAM_CTRL_FPP_PROG_EN);
+		/* Reset the IFPP engine to make its program mem accessible */
+		writel(EIP197_PE_ICE_x_CTRL_SW_RESET |
+		       EIP197_PE_ICE_x_CTRL_CLR_ECC_CORR |
+		       EIP197_PE_ICE_x_CTRL_CLR_ECC_NON_CORR,
+		       EIP197_PE(priv) + EIP197_PE_ICE_FPP_CTRL(pe));
 
-		eip197_write_firmware(priv, fw[FW_IPUE], pe,
-				      EIP197_PE_ICE_PUE_CTRL(pe),
-				      EIP197_PE_ICE_RAM_CTRL_PUE_PROG_EN);
+		/* Reset the IPUE engine to make its program mem accessible */
+		writel(EIP197_PE_ICE_x_CTRL_SW_RESET |
+		       EIP197_PE_ICE_x_CTRL_CLR_ECC_CORR |
+		       EIP197_PE_ICE_x_CTRL_CLR_ECC_NON_CORR,
+		       EIP197_PE(priv) + EIP197_PE_ICE_PUE_CTRL(pe));
+
+		/* Enable access to all IFPP program memories */
+		writel(EIP197_PE_ICE_RAM_CTRL_FPP_PROG_EN,
+		       EIP197_PE(priv) + EIP197_PE_ICE_RAM_CTRL(pe));
 	}
 
+}
+
+static int eip197_write_firmware(struct safexcel_crypto_priv *priv,
+				  const struct firmware *fw)
+{
+	const u32 *data = (const u32 *)fw->data;
+	int i;
+
+	/* Write the firmware */
+	for (i = 0; i < fw->size / sizeof(u32); i++)
+		writel(be32_to_cpu(data[i]),
+		       priv->base + EIP197_CLASSIFICATION_RAMS + i * sizeof(u32));
+
+	/* Exclude final 2 NOPs from size */
+	return i - EIP197_FW_TERMINAL_NOPS;
+}
+
+/*
+ * If FW is actual production firmware, then poll for its initialization
+ * to complete and check if it is good for the HW, otherwise just return OK.
+ */
+static bool poll_fw_ready(struct safexcel_crypto_priv *priv, int fpp)
+{
+	int pe, pollcnt;
+	u32 base, pollofs;
+
+	if (fpp)
+		pollofs  = EIP197_FW_FPP_READY;
+	else
+		pollofs  = EIP197_FW_PUE_READY;
+
+	for (pe = 0; pe < priv->config.pes; pe++) {
+		base = EIP197_PE_ICE_SCRATCH_RAM(pe);
+		pollcnt = EIP197_FW_START_POLLCNT;
+		while (pollcnt &&
+		       (readl_relaxed(EIP197_PE(priv) + base +
+			      pollofs) != 1)) {
+			pollcnt--;
+		}
+		if (!pollcnt) {
+			dev_err(priv->dev, "FW(%d) for PE %d failed to start\n",
+				fpp, pe);
+			return false;
+		}
+	}
+	return true;
+}
+
+static bool eip197_start_firmware(struct safexcel_crypto_priv *priv,
+				  int ipuesz, int ifppsz, int minifw)
+{
+	int pe;
+	u32 val;
+
+	for (pe = 0; pe < priv->config.pes; pe++) {
+		/* Disable access to all program memory */
+		writel(0, EIP197_PE(priv) + EIP197_PE_ICE_RAM_CTRL(pe));
+
+		/* Start IFPP microengines */
+		if (minifw)
+			val = 0;
+		else
+			val = EIP197_PE_ICE_UENG_START_OFFSET((ifppsz - 1) &
+					EIP197_PE_ICE_UENG_INIT_ALIGN_MASK) |
+				EIP197_PE_ICE_UENG_DEBUG_RESET;
+		writel(val, EIP197_PE(priv) + EIP197_PE_ICE_FPP_CTRL(pe));
+
+		/* Start IPUE microengines */
+		if (minifw)
+			val = 0;
+		else
+			val = EIP197_PE_ICE_UENG_START_OFFSET((ipuesz - 1) &
+					EIP197_PE_ICE_UENG_INIT_ALIGN_MASK) |
+				EIP197_PE_ICE_UENG_DEBUG_RESET;
+		writel(val, EIP197_PE(priv) + EIP197_PE_ICE_PUE_CTRL(pe));
+	}
+
+	/* For miniFW startup, there is no initialization, so always succeed */
+	if (minifw)
+		return true;
+
+	/* Wait until all the firmwares have properly started up */
+	if (!poll_fw_ready(priv, 1))
+		return false;
+	if (!poll_fw_ready(priv, 0))
+		return false;
+
+	return true;
+}
+
+static int eip197_load_firmwares(struct safexcel_crypto_priv *priv)
+{
+	const char *fw_name[] = {"ifpp.bin", "ipue.bin"};
+	const struct firmware *fw[FW_NB];
+	char fw_path[37], *dir = NULL;
+	int i, j, ret = 0, pe;
+	int ipuesz, ifppsz, minifw = 0;
+
+	if (priv->version == EIP197D_MRVL)
+		dir = "eip197d";
+	else if (priv->version == EIP197B_MRVL ||
+		 priv->version == EIP197_DEVBRD)
+		dir = "eip197b";
+	else
+		return -ENODEV;
+
+retry_fw:
+	for (i = 0; i < FW_NB; i++) {
+		snprintf(fw_path, 37, "inside-secure/%s/%s", dir, fw_name[i]);
+		ret = firmware_request_nowarn(&fw[i], fw_path, priv->dev);
+		if (ret) {
+			if (minifw || priv->version != EIP197B_MRVL)
+				goto release_fw;
+
+			/* Fallback to the old firmware location for the
+			 * EIP197b.
+			 */
+			ret = firmware_request_nowarn(&fw[i], fw_name[i],
+						      priv->dev);
+			if (ret)
+				goto release_fw;
+		}
+	}
+
+	eip197_init_firmware(priv);
+
+	ifppsz = eip197_write_firmware(priv, fw[FW_IFPP]);
+
+	/* Enable access to IPUE program memories */
+	for (pe = 0; pe < priv->config.pes; pe++)
+		writel(EIP197_PE_ICE_RAM_CTRL_PUE_PROG_EN,
+		       EIP197_PE(priv) + EIP197_PE_ICE_RAM_CTRL(pe));
+
+	ipuesz = eip197_write_firmware(priv, fw[FW_IPUE]);
+
+	if (eip197_start_firmware(priv, ipuesz, ifppsz, minifw)) {
+		dev_dbg(priv->dev, "Firmware loaded successfully\n");
+		return 0;
+	}
+
+	ret = -ENODEV;
+
 release_fw:
 	for (j = 0; j < i; j++)
 		release_firmware(fw[j]);
 
+	if (!minifw) {
+		/* Retry with minifw path */
+		dev_dbg(priv->dev, "Firmware set not (fully) present or init failed, falling back to BCLA mode\n");
+		dir = "eip197_minifw";
+		minifw = 1;
+		goto retry_fw;
+	}
+
+	dev_dbg(priv->dev, "Firmware load failed.\n");
+
 	return ret;
 }
 
 static int safexcel_hw_setup_cdesc_rings(struct safexcel_crypto_priv *priv)
 {
-	u32 hdw, cd_size_rnd, val;
-	int i;
+	u32 cd_size_rnd, val;
+	int i, cd_fetch_cnt;
 
-	hdw = readl(EIP197_HIA_AIC_G(priv) + EIP197_HIA_OPTIONS);
-	hdw &= GENMASK(27, 25);
-	hdw >>= 25;
-
-	cd_size_rnd = (priv->config.cd_size + (BIT(hdw) - 1)) >> hdw;
+	cd_size_rnd  = (priv->config.cd_size +
+			(BIT(priv->hwconfig.hwdataw) - 1)) >>
+		       priv->hwconfig.hwdataw;
+	/* determine number of CD's we can fetch into the CD FIFO as 1 block */
+	if (priv->flags & SAFEXCEL_HW_EIP197) {
+		/* EIP197: try to fetch enough in 1 go to keep all pipes busy */
+		cd_fetch_cnt = (1 << priv->hwconfig.hwcfsize) / cd_size_rnd;
+		cd_fetch_cnt = min_t(uint, cd_fetch_cnt,
+				     (priv->config.pes * EIP197_FETCH_DEPTH));
+	} else {
+		/* for the EIP97, just fetch all that fits minus 1 */
+		cd_fetch_cnt = ((1 << priv->hwconfig.hwcfsize) /
+				cd_size_rnd) - 1;
+	}
 
 	for (i = 0; i < priv->config.rings; i++) {
 		/* ring base address */
@@ -226,8 +482,9 @@
 		writel(EIP197_xDR_DESC_MODE_64BIT | (priv->config.cd_offset << 16) |
 		       priv->config.cd_size,
 		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_DESC_SIZE);
-		writel(((EIP197_FETCH_COUNT * (cd_size_rnd << hdw)) << 16) |
-		       (EIP197_FETCH_COUNT * priv->config.cd_offset),
+		writel(((cd_fetch_cnt *
+			 (cd_size_rnd << priv->hwconfig.hwdataw)) << 16) |
+		       (cd_fetch_cnt * priv->config.cd_offset),
 		       EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_CFG);
 
 		/* Configure DMA tx control */
@@ -245,14 +502,23 @@
 
 static int safexcel_hw_setup_rdesc_rings(struct safexcel_crypto_priv *priv)
 {
-	u32 hdw, rd_size_rnd, val;
-	int i;
+	u32 rd_size_rnd, val;
+	int i, rd_fetch_cnt;
 
-	hdw = readl(EIP197_HIA_AIC_G(priv) + EIP197_HIA_OPTIONS);
-	hdw &= GENMASK(27, 25);
-	hdw >>= 25;
-
-	rd_size_rnd = (priv->config.rd_size + (BIT(hdw) - 1)) >> hdw;
+	/* determine number of RD's we can fetch into the FIFO as one block */
+	rd_size_rnd = (EIP197_RD64_FETCH_SIZE +
+		       (BIT(priv->hwconfig.hwdataw) - 1)) >>
+		      priv->hwconfig.hwdataw;
+	if (priv->flags & SAFEXCEL_HW_EIP197) {
+		/* EIP197: try to fetch enough in 1 go to keep all pipes busy */
+		rd_fetch_cnt = (1 << priv->hwconfig.hwrfsize) / rd_size_rnd;
+		rd_fetch_cnt = min_t(uint, rd_fetch_cnt,
+				     (priv->config.pes * EIP197_FETCH_DEPTH));
+	} else {
+		/* for the EIP97, just fetch all that fits minus 1 */
+		rd_fetch_cnt = ((1 << priv->hwconfig.hwrfsize) /
+				rd_size_rnd) - 1;
+	}
 
 	for (i = 0; i < priv->config.rings; i++) {
 		/* ring base address */
@@ -265,8 +531,9 @@
 		       priv->config.rd_size,
 		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_DESC_SIZE);
 
-		writel(((EIP197_FETCH_COUNT * (rd_size_rnd << hdw)) << 16) |
-		       (EIP197_FETCH_COUNT * priv->config.rd_offset),
+		writel(((rd_fetch_cnt *
+			 (rd_size_rnd << priv->hwconfig.hwdataw)) << 16) |
+		       (rd_fetch_cnt * priv->config.rd_offset),
 		       EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_CFG);
 
 		/* Configure DMA tx control */
@@ -291,23 +558,21 @@
 
 static int safexcel_hw_init(struct safexcel_crypto_priv *priv)
 {
-	u32 version, val;
+	u32 val;
 	int i, ret, pe;
 
-	/* Determine endianess and configure byte swap */
-	version = readl(EIP197_HIA_AIC(priv) + EIP197_HIA_VERSION);
-	val = readl(EIP197_HIA_AIC(priv) + EIP197_HIA_MST_CTRL);
+	dev_dbg(priv->dev, "HW init: using %d pipe(s) and %d ring(s)\n",
+		priv->config.pes, priv->config.rings);
 
-	if ((version & 0xffff) == EIP197_HIA_VERSION_BE)
-		val |= EIP197_MST_CTRL_BYTE_SWAP;
-	else if (((version >> 16) & 0xffff) == EIP197_HIA_VERSION_LE)
-		val |= (EIP197_MST_CTRL_NO_BYTE_SWAP >> 24);
-
-	/* For EIP197 set maximum number of TX commands to 2^5 = 32 */
-	if (priv->version == EIP197B || priv->version == EIP197D)
+	/*
+	 * For EIP197's only set maximum number of TX commands to 2^5 = 32
+	 * Skip for the EIP97 as it does not have this field.
+	 */
+	if (priv->flags & SAFEXCEL_HW_EIP197) {
+		val = readl(EIP197_HIA_AIC(priv) + EIP197_HIA_MST_CTRL);
 		val |= EIP197_MST_CTRL_TX_MAX_CMD(5);
-
-	writel(val, EIP197_HIA_AIC(priv) + EIP197_HIA_MST_CTRL);
+		writel(val, EIP197_HIA_AIC(priv) + EIP197_HIA_MST_CTRL);
+	}
 
 	/* Configure wr/rd cache values */
 	writel(EIP197_MST_CTRL_RD_CACHE(RD_CACHE_4BITS) |
@@ -330,11 +595,10 @@
 		writel(EIP197_DxE_THR_CTRL_RESET_PE,
 		       EIP197_HIA_DFE_THR(priv) + EIP197_HIA_DFE_THR_CTRL(pe));
 
-		if (priv->version == EIP197B || priv->version == EIP197D) {
-			/* Reset HIA input interface arbiter */
+		if (priv->flags & SAFEXCEL_HW_EIP197)
+			/* Reset HIA input interface arbiter (EIP197 only) */
 			writel(EIP197_HIA_RA_PE_CTRL_RESET,
 			       EIP197_HIA_AIC(priv) + EIP197_HIA_RA_PE_CTRL(pe));
-		}
 
 		/* DMA transfer size to use */
 		val = EIP197_HIA_DFE_CFG_DIS_DEBUG;
@@ -357,12 +621,11 @@
 		       EIP197_PE_IN_xBUF_THRES_MAX(7),
 		       EIP197_PE(priv) + EIP197_PE_IN_TBUF_THRES(pe));
 
-		if (priv->version == EIP197B || priv->version == EIP197D) {
+		if (priv->flags & SAFEXCEL_HW_EIP197)
 			/* enable HIA input interface arbiter and rings */
 			writel(EIP197_HIA_RA_PE_CTRL_EN |
 			       GENMASK(priv->config.rings - 1, 0),
 			       EIP197_HIA_AIC(priv) + EIP197_HIA_RA_PE_CTRL(pe));
-		}
 
 		/* Data Store Engine configuration */
 
@@ -381,10 +644,10 @@
 		       EIP197_HIA_DxE_CFG_MAX_DATA_SIZE(8);
 		val |= EIP197_HIA_DxE_CFG_DATA_CACHE_CTRL(WR_CACHE_3BITS);
 		val |= EIP197_HIA_DSE_CFG_ALWAYS_BUFFERABLE;
-		/* FIXME: instability issues can occur for EIP97 but disabling it impact
-		 * performances.
+		/* FIXME: instability issues can occur for EIP97 but disabling
+		 * it impacts performance.
 		 */
-		if (priv->version == EIP197B || priv->version == EIP197D)
+		if (priv->flags & SAFEXCEL_HW_EIP197)
 			val |= EIP197_HIA_DSE_CFG_EN_SINGLE_WR;
 		writel(val, EIP197_HIA_DSE(priv) + EIP197_HIA_DSE_CFG(pe));
 
@@ -400,21 +663,15 @@
 
 		/* Token & context configuration */
 		val = EIP197_PE_EIP96_TOKEN_CTRL_CTX_UPDATES |
-		      EIP197_PE_EIP96_TOKEN_CTRL_REUSE_CTX |
-		      EIP197_PE_EIP96_TOKEN_CTRL_POST_REUSE_CTX;
+		      EIP197_PE_EIP96_TOKEN_CTRL_NO_TOKEN_WAIT |
+		      EIP197_PE_EIP96_TOKEN_CTRL_ENABLE_TIMEOUT;
 		writel(val, EIP197_PE(priv) + EIP197_PE_EIP96_TOKEN_CTRL(pe));
 
-		/* H/W capabilities selection */
-		val = EIP197_FUNCTION_RSVD;
-		val |= EIP197_PROTOCOL_ENCRYPT_ONLY | EIP197_PROTOCOL_HASH_ONLY;
-		val |= EIP197_PROTOCOL_ENCRYPT_HASH | EIP197_PROTOCOL_HASH_DECRYPT;
-		val |= EIP197_ALG_DES_ECB | EIP197_ALG_DES_CBC;
-		val |= EIP197_ALG_3DES_ECB | EIP197_ALG_3DES_CBC;
-		val |= EIP197_ALG_AES_ECB | EIP197_ALG_AES_CBC;
-		val |= EIP197_ALG_MD5 | EIP197_ALG_HMAC_MD5;
-		val |= EIP197_ALG_SHA1 | EIP197_ALG_HMAC_SHA1;
-		val |= EIP197_ALG_SHA2 | EIP197_ALG_HMAC_SHA2;
-		writel(val, EIP197_PE(priv) + EIP197_PE_EIP96_FUNCTION_EN(pe));
+		/* H/W capabilities selection: just enable everything */
+		writel(EIP197_FUNCTION_ALL,
+		       EIP197_PE(priv) + EIP197_PE_EIP96_FUNCTION_EN(pe));
+		writel(EIP197_FUNCTION_ALL,
+		       EIP197_PE(priv) + EIP197_PE_EIP96_FUNCTION2_EN(pe));
 	}
 
 	/* Command Descriptor Rings prepare */
@@ -479,8 +736,9 @@
 	/* Clear any HIA interrupt */
 	writel(GENMASK(30, 20), EIP197_HIA_AIC_G(priv) + EIP197_HIA_AIC_G_ACK);
 
-	if (priv->version == EIP197B || priv->version == EIP197D) {
+	if (priv->flags & SAFEXCEL_HW_EIP197) {
 		eip197_trc_cache_init(priv);
+		priv->flags |= EIP197_TRC_CACHE;
 
 		ret = eip197_load_firmwares(priv);
 		if (ret)
@@ -589,16 +847,32 @@
 inline int safexcel_rdesc_check_errors(struct safexcel_crypto_priv *priv,
 				       struct safexcel_result_desc *rdesc)
 {
-	if (likely(!rdesc->result_data.error_code))
+	if (likely((!rdesc->descriptor_overflow) &&
+		   (!rdesc->buffer_overflow) &&
+		   (!rdesc->result_data.error_code)))
 		return 0;
 
-	if (rdesc->result_data.error_code & 0x407f) {
-		/* Fatal error (bits 0-7, 14) */
+	if (rdesc->descriptor_overflow)
+		dev_err(priv->dev, "Descriptor overflow detected");
+
+	if (rdesc->buffer_overflow)
+		dev_err(priv->dev, "Buffer overflow detected");
+
+	if (rdesc->result_data.error_code & 0x4066) {
+		/* Fatal error (bits 1,2,5,6 & 14) */
 		dev_err(priv->dev,
-			"cipher: result: result descriptor error (0x%x)\n",
+			"result descriptor error (%x)",
 			rdesc->result_data.error_code);
+		return -EIO;
+	} else if (rdesc->result_data.error_code &
+		   (BIT(7) | BIT(4) | BIT(3) | BIT(0))) {
+		/*
+		 * Give priority over authentication fails:
+		 * Blocksize, length & overflow errors,
+		 * something wrong with the input!
+		 */
 		return -EINVAL;
-	} else if (rdesc->result_data.error_code == BIT(9)) {
+	} else if (rdesc->result_data.error_code & BIT(9)) {
 		/* Authentication failed */
 		return -EBADMSG;
 	}
@@ -711,7 +985,8 @@
 		ndesc = ctx->handle_result(priv, ring, req,
 					   &should_complete, &ret);
 		if (ndesc < 0) {
-			dev_err(priv->dev, "failed to handle result (%d)", ndesc);
+			dev_err(priv->dev, "failed to handle result (%d)\n",
+				ndesc);
 			goto acknowledge;
 		}
 
@@ -783,7 +1058,7 @@
 			 * reinitialized. This should not happen under
 			 * normal circumstances.
 			 */
-			dev_err(priv->dev, "RDR: fatal error.");
+			dev_err(priv->dev, "RDR: fatal error.\n");
 		} else if (likely(stat & EIP197_xDR_THRESH)) {
 			rc = IRQ_WAKE_THREAD;
 		}
@@ -813,23 +1088,45 @@
 	return IRQ_HANDLED;
 }
 
-static int safexcel_request_ring_irq(struct platform_device *pdev, const char *name,
+static int safexcel_request_ring_irq(void *pdev, int irqid,
+				     int is_pci_dev,
 				     irq_handler_t handler,
 				     irq_handler_t threaded_handler,
 				     struct safexcel_ring_irq_data *ring_irq_priv)
 {
-	int ret, irq = platform_get_irq_byname(pdev, name);
+	int ret, irq;
+	struct device *dev;
 
-	if (irq < 0) {
-		dev_err(&pdev->dev, "unable to get IRQ '%s'\n", name);
-		return irq;
+	if (IS_ENABLED(CONFIG_PCI) && is_pci_dev) {
+		struct pci_dev *pci_pdev = pdev;
+
+		dev = &pci_pdev->dev;
+		irq = pci_irq_vector(pci_pdev, irqid);
+		if (irq < 0) {
+			dev_err(dev, "unable to get device MSI IRQ %d (err %d)\n",
+				irqid, irq);
+			return irq;
+		}
+	} else if (IS_ENABLED(CONFIG_OF)) {
+		struct platform_device *plf_pdev = pdev;
+		char irq_name[6] = {0}; /* "ringX\0" */
+
+		snprintf(irq_name, 6, "ring%d", irqid);
+		dev = &plf_pdev->dev;
+		irq = platform_get_irq_byname(plf_pdev, irq_name);
+
+		if (irq < 0) {
+			dev_err(dev, "unable to get IRQ '%s' (err %d)\n",
+				irq_name, irq);
+			return irq;
+		}
 	}
 
-	ret = devm_request_threaded_irq(&pdev->dev, irq, handler,
+	ret = devm_request_threaded_irq(dev, irq, handler,
 					threaded_handler, IRQF_ONESHOT,
-					dev_name(&pdev->dev), ring_irq_priv);
+					dev_name(dev), ring_irq_priv);
 	if (ret) {
-		dev_err(&pdev->dev, "unable to request IRQ %d\n", irq);
+		dev_err(dev, "unable to request IRQ %d\n", irq);
 		return ret;
 	}
 
@@ -843,6 +1140,9 @@
 	&safexcel_alg_cbc_des3_ede,
 	&safexcel_alg_ecb_aes,
 	&safexcel_alg_cbc_aes,
+	&safexcel_alg_cfb_aes,
+	&safexcel_alg_ofb_aes,
+	&safexcel_alg_ctr_aes,
 	&safexcel_alg_md5,
 	&safexcel_alg_sha1,
 	&safexcel_alg_sha224,
@@ -860,6 +1160,15 @@
 	&safexcel_alg_authenc_hmac_sha256_cbc_aes,
 	&safexcel_alg_authenc_hmac_sha384_cbc_aes,
 	&safexcel_alg_authenc_hmac_sha512_cbc_aes,
+	&safexcel_alg_authenc_hmac_sha1_cbc_des3_ede,
+	&safexcel_alg_authenc_hmac_sha1_ctr_aes,
+	&safexcel_alg_authenc_hmac_sha224_ctr_aes,
+	&safexcel_alg_authenc_hmac_sha256_ctr_aes,
+	&safexcel_alg_authenc_hmac_sha384_ctr_aes,
+	&safexcel_alg_authenc_hmac_sha512_ctr_aes,
+	&safexcel_alg_xts_aes,
+	&safexcel_alg_gcm,
+	&safexcel_alg_ccm,
 };
 
 static int safexcel_register_algorithms(struct safexcel_crypto_priv *priv)
@@ -869,7 +1178,10 @@
 	for (i = 0; i < ARRAY_SIZE(safexcel_algs); i++) {
 		safexcel_algs[i]->priv = priv;
 
-		if (!(safexcel_algs[i]->engines & priv->version))
+		/* Do we have all required base algorithms available? */
+		if ((safexcel_algs[i]->algo_mask & priv->hwconfig.algo_flags) !=
+		    safexcel_algs[i]->algo_mask)
+			/* No, so don't register this ciphersuite */
 			continue;
 
 		if (safexcel_algs[i]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
@@ -887,7 +1199,10 @@
 
 fail:
 	for (j = 0; j < i; j++) {
-		if (!(safexcel_algs[j]->engines & priv->version))
+		/* Do we have all required base algorithms available? */
+		if ((safexcel_algs[j]->algo_mask & priv->hwconfig.algo_flags) !=
+		    safexcel_algs[j]->algo_mask)
+			/* No, so don't unregister this ciphersuite */
 			continue;
 
 		if (safexcel_algs[j]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
@@ -906,7 +1221,10 @@
 	int i;
 
 	for (i = 0; i < ARRAY_SIZE(safexcel_algs); i++) {
-		if (!(safexcel_algs[i]->engines & priv->version))
+		/* Do we have all required base algorithms available? */
+		if ((safexcel_algs[i]->algo_mask & priv->hwconfig.algo_flags) !=
+		    safexcel_algs[i]->algo_mask)
+			/* No, so don't unregister this ciphersuite */
 			continue;
 
 		if (safexcel_algs[i]->type == SAFEXCEL_ALG_TYPE_SKCIPHER)
@@ -925,22 +1243,20 @@
 	val = readl(EIP197_HIA_AIC_G(priv) + EIP197_HIA_OPTIONS);
 
 	/* Read number of PEs from the engine */
-	switch (priv->version) {
-	case EIP197B:
-	case EIP197D:
+	if (priv->flags & SAFEXCEL_HW_EIP197)
+		/* Wider field width for all EIP197 type engines */
 		mask = EIP197_N_PES_MASK;
-		break;
-	default:
+	else
+		/* Narrow field width for EIP97 type engine */
 		mask = EIP97_N_PES_MASK;
-	}
+
 	priv->config.pes = (val >> EIP197_N_PES_OFFSET) & mask;
 
+	priv->config.rings = min_t(u32, val & GENMASK(3, 0), max_rings);
+
 	val = (val & GENMASK(27, 25)) >> 25;
 	mask = BIT(val) - 1;
 
-	val = readl(EIP197_HIA_AIC_G(priv) + EIP197_HIA_OPTIONS);
-	priv->config.rings = min_t(u32, val & GENMASK(3, 0), max_rings);
-
 	priv->config.cd_size = (sizeof(struct safexcel_command_desc) / sizeof(u32));
 	priv->config.cd_offset = (priv->config.cd_size + mask) & ~mask;
 
@@ -952,9 +1268,7 @@
 {
 	struct safexcel_register_offsets *offsets = &priv->offsets;
 
-	switch (priv->version) {
-	case EIP197B:
-	case EIP197D:
+	if (priv->flags & SAFEXCEL_HW_EIP197) {
 		offsets->hia_aic	= EIP197_HIA_AIC_BASE;
 		offsets->hia_aic_g	= EIP197_HIA_AIC_G_BASE;
 		offsets->hia_aic_r	= EIP197_HIA_AIC_R_BASE;
@@ -965,8 +1279,8 @@
 		offsets->hia_dse_thr	= EIP197_HIA_DSE_THR_BASE;
 		offsets->hia_gen_cfg	= EIP197_HIA_GEN_CFG_BASE;
 		offsets->pe		= EIP197_PE_BASE;
-		break;
-	case EIP97IES:
+		offsets->global		= EIP197_GLOBAL_BASE;
+	} else {
 		offsets->hia_aic	= EIP97_HIA_AIC_BASE;
 		offsets->hia_aic_g	= EIP97_HIA_AIC_G_BASE;
 		offsets->hia_aic_r	= EIP97_HIA_AIC_R_BASE;
@@ -977,16 +1291,268 @@
 		offsets->hia_dse_thr	= EIP97_HIA_DSE_THR_BASE;
 		offsets->hia_gen_cfg	= EIP97_HIA_GEN_CFG_BASE;
 		offsets->pe		= EIP97_PE_BASE;
-		break;
+		offsets->global		= EIP97_GLOBAL_BASE;
 	}
 }
 
+/*
+ * Generic part of probe routine, shared by platform and PCI driver
+ *
+ * Assumes IO resources have been mapped, private data mem has been allocated,
+ * clocks have been enabled, device pointer has been assigned etc.
+ *
+ */
+static int safexcel_probe_generic(void *pdev,
+				  struct safexcel_crypto_priv *priv,
+				  int is_pci_dev)
+{
+	struct device *dev = priv->dev;
+	u32 peid, version, mask, val, hiaopt;
+	int i, ret, hwctg;
+
+	priv->context_pool = dmam_pool_create("safexcel-context", dev,
+					      sizeof(struct safexcel_context_record),
+					      1, 0);
+	if (!priv->context_pool)
+		return -ENOMEM;
+
+	/*
+	 * First try the EIP97 HIA version regs
+	 * For the EIP197, this is guaranteed to NOT return any of the test
+	 * values
+	 */
+	version = readl(priv->base + EIP97_HIA_AIC_BASE + EIP197_HIA_VERSION);
+
+	mask = 0;  /* do not swap */
+	if (EIP197_REG_LO16(version) == EIP197_HIA_VERSION_LE) {
+		priv->hwconfig.hiaver = EIP197_VERSION_MASK(version);
+	} else if (EIP197_REG_HI16(version) == EIP197_HIA_VERSION_BE) {
+		/* read back byte-swapped, so complement byte swap bits */
+		mask = EIP197_MST_CTRL_BYTE_SWAP_BITS;
+		priv->hwconfig.hiaver = EIP197_VERSION_SWAP(version);
+	} else {
+		/* So it wasn't an EIP97 ... maybe it's an EIP197? */
+		version = readl(priv->base + EIP197_HIA_AIC_BASE +
+				EIP197_HIA_VERSION);
+		if (EIP197_REG_LO16(version) == EIP197_HIA_VERSION_LE) {
+			priv->hwconfig.hiaver = EIP197_VERSION_MASK(version);
+			priv->flags |= SAFEXCEL_HW_EIP197;
+		} else if (EIP197_REG_HI16(version) ==
+			   EIP197_HIA_VERSION_BE) {
+			/* read back byte-swapped, so complement swap bits */
+			mask = EIP197_MST_CTRL_BYTE_SWAP_BITS;
+			priv->hwconfig.hiaver = EIP197_VERSION_SWAP(version);
+			priv->flags |= SAFEXCEL_HW_EIP197;
+		} else {
+			return -ENODEV;
+		}
+	}
+
+	/* Now initialize the reg offsets based on the probing info so far */
+	safexcel_init_register_offsets(priv);
+
+	/*
+	 * If the version was read byte-swapped, we need to flip the device
+	 * swapping Keep in mind here, though, that what we write will also be
+	 * byte-swapped ...
+	 */
+	if (mask) {
+		val = readl(EIP197_HIA_AIC(priv) + EIP197_HIA_MST_CTRL);
+		val = val ^ (mask >> 24); /* toggle byte swap bits */
+		writel(val, EIP197_HIA_AIC(priv) + EIP197_HIA_MST_CTRL);
+	}
+
+	/*
+	 * We're not done probing yet! We may fall through to here if no HIA
+	 * was found at all. So, with the endianness presumably correct now and
+	 * the offsets setup, *really* probe for the EIP97/EIP197.
+	 */
+	version = readl(EIP197_GLOBAL(priv) + EIP197_VERSION);
+	if (((priv->flags & SAFEXCEL_HW_EIP197) &&
+	     (EIP197_REG_LO16(version) != EIP197_VERSION_LE)) ||
+	    ((!(priv->flags & SAFEXCEL_HW_EIP197) &&
+	     (EIP197_REG_LO16(version) != EIP97_VERSION_LE)))) {
+		/*
+		 * We did not find the device that matched our initial probing
+		 * (or our initial probing failed) Report appropriate error.
+		 */
+		return -ENODEV;
+	}
+
+	priv->hwconfig.hwver = EIP197_VERSION_MASK(version);
+	hwctg = version >> 28;
+	peid = version & 255;
+
+	/* Detect EIP96 packet engine and version */
+	version = readl(EIP197_PE(priv) + EIP197_PE_EIP96_VERSION(0));
+	if (EIP197_REG_LO16(version) != EIP96_VERSION_LE) {
+		dev_err(dev, "EIP%d: EIP96 not detected.\n", peid);
+		return -ENODEV;
+	}
+	priv->hwconfig.pever = EIP197_VERSION_MASK(version);
+
+	hiaopt = readl(EIP197_HIA_AIC(priv) + EIP197_HIA_OPTIONS);
+
+	if (priv->flags & SAFEXCEL_HW_EIP197) {
+		/* EIP197 */
+		priv->hwconfig.hwdataw  = (hiaopt >> EIP197_HWDATAW_OFFSET) &
+					  EIP197_HWDATAW_MASK;
+		priv->hwconfig.hwcfsize = ((hiaopt >> EIP197_CFSIZE_OFFSET) &
+					   EIP197_CFSIZE_MASK) +
+					  EIP197_CFSIZE_ADJUST;
+		priv->hwconfig.hwrfsize = ((hiaopt >> EIP197_RFSIZE_OFFSET) &
+					   EIP197_RFSIZE_MASK) +
+					  EIP197_RFSIZE_ADJUST;
+	} else {
+		/* EIP97 */
+		priv->hwconfig.hwdataw  = (hiaopt >> EIP197_HWDATAW_OFFSET) &
+					  EIP97_HWDATAW_MASK;
+		priv->hwconfig.hwcfsize = (hiaopt >> EIP97_CFSIZE_OFFSET) &
+					  EIP97_CFSIZE_MASK;
+		priv->hwconfig.hwrfsize = (hiaopt >> EIP97_RFSIZE_OFFSET) &
+					  EIP97_RFSIZE_MASK;
+	}
+
+	/* Get supported algorithms from EIP96 transform engine */
+	priv->hwconfig.algo_flags = readl(EIP197_PE(priv) +
+				    EIP197_PE_EIP96_OPTIONS(0));
+
+	/* Print single info line describing what we just detected */
+	dev_info(priv->dev, "EIP%d:%x(%d)-HIA:%x(%d,%d,%d),PE:%x,alg:%08x\n",
+		 peid, priv->hwconfig.hwver, hwctg, priv->hwconfig.hiaver,
+		 priv->hwconfig.hwdataw, priv->hwconfig.hwcfsize,
+		 priv->hwconfig.hwrfsize, priv->hwconfig.pever,
+		 priv->hwconfig.algo_flags);
+
+	safexcel_configure(priv);
+
+	if (IS_ENABLED(CONFIG_PCI) && priv->version == EIP197_DEVBRD) {
+		/*
+		 * Request MSI vectors for global + 1 per ring -
+		 * or just 1 for older dev images
+		 */
+		struct pci_dev *pci_pdev = pdev;
+
+		ret = pci_alloc_irq_vectors(pci_pdev,
+					    priv->config.rings + 1,
+					    priv->config.rings + 1,
+					    PCI_IRQ_MSI | PCI_IRQ_MSIX);
+		if (ret < 0) {
+			dev_err(dev, "Failed to allocate PCI MSI interrupts\n");
+			return ret;
+		}
+	}
+
+	/* Register the ring IRQ handlers and configure the rings */
+	priv->ring = devm_kcalloc(dev, priv->config.rings,
+				  sizeof(*priv->ring),
+				  GFP_KERNEL);
+	if (!priv->ring)
+		return -ENOMEM;
+
+	for (i = 0; i < priv->config.rings; i++) {
+		char wq_name[9] = {0};
+		int irq;
+		struct safexcel_ring_irq_data *ring_irq;
+
+		ret = safexcel_init_ring_descriptors(priv,
+						     &priv->ring[i].cdr,
+						     &priv->ring[i].rdr);
+		if (ret) {
+			dev_err(dev, "Failed to initialize rings\n");
+			return ret;
+		}
+
+		priv->ring[i].rdr_req = devm_kcalloc(dev,
+			EIP197_DEFAULT_RING_SIZE,
+			sizeof(priv->ring[i].rdr_req),
+			GFP_KERNEL);
+		if (!priv->ring[i].rdr_req)
+			return -ENOMEM;
+
+		ring_irq = devm_kzalloc(dev, sizeof(*ring_irq), GFP_KERNEL);
+		if (!ring_irq)
+			return -ENOMEM;
+
+		ring_irq->priv = priv;
+		ring_irq->ring = i;
+
+		irq = safexcel_request_ring_irq(pdev,
+						EIP197_IRQ_NUMBER(i, is_pci_dev),
+						is_pci_dev,
+						safexcel_irq_ring,
+						safexcel_irq_ring_thread,
+						ring_irq);
+		if (irq < 0) {
+			dev_err(dev, "Failed to get IRQ ID for ring %d\n", i);
+			return irq;
+		}
+
+		priv->ring[i].work_data.priv = priv;
+		priv->ring[i].work_data.ring = i;
+		INIT_WORK(&priv->ring[i].work_data.work,
+			  safexcel_dequeue_work);
+
+		snprintf(wq_name, 9, "wq_ring%d", i);
+		priv->ring[i].workqueue =
+			create_singlethread_workqueue(wq_name);
+		if (!priv->ring[i].workqueue)
+			return -ENOMEM;
+
+		priv->ring[i].requests = 0;
+		priv->ring[i].busy = false;
+
+		crypto_init_queue(&priv->ring[i].queue,
+				  EIP197_DEFAULT_RING_SIZE);
+
+		spin_lock_init(&priv->ring[i].lock);
+		spin_lock_init(&priv->ring[i].queue_lock);
+	}
+
+	atomic_set(&priv->ring_used, 0);
+
+	ret = safexcel_hw_init(priv);
+	if (ret) {
+		dev_err(dev, "HW init failed (%d)\n", ret);
+		return ret;
+	}
+
+	ret = safexcel_register_algorithms(priv);
+	if (ret) {
+		dev_err(dev, "Failed to register algorithms (%d)\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static void safexcel_hw_reset_rings(struct safexcel_crypto_priv *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->config.rings; i++) {
+		/* clear any pending interrupt */
+		writel(GENMASK(5, 0), EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_STAT);
+		writel(GENMASK(7, 0), EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_STAT);
+
+		/* Reset the CDR base address */
+		writel(0, EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
+		writel(0, EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
+
+		/* Reset the RDR base address */
+		writel(0, EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
+		writel(0, EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
+	}
+}
+
+#if IS_ENABLED(CONFIG_OF)
+/* for Device Tree platform driver */
+
 static int safexcel_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct resource *res;
 	struct safexcel_crypto_priv *priv;
-	int i, ret;
+	int ret;
 
 	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@@ -995,13 +1561,9 @@
 	priv->dev = dev;
 	priv->version = (enum safexcel_eip_version)of_device_get_match_data(dev);
 
-	if (priv->version == EIP197B || priv->version == EIP197D)
-		priv->flags |= EIP197_TRC_CACHE;
+	platform_set_drvdata(pdev, priv);
 
-	safexcel_init_register_offsets(priv);
-
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	priv->base = devm_ioremap_resource(dev, res);
+	priv->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(priv->base)) {
 		dev_err(dev, "failed to get resource\n");
 		return PTR_ERR(priv->base);
@@ -1039,98 +1601,10 @@
 	if (ret)
 		goto err_reg_clk;
 
-	priv->context_pool = dmam_pool_create("safexcel-context", dev,
-					      sizeof(struct safexcel_context_record),
-					      1, 0);
-	if (!priv->context_pool) {
-		ret = -ENOMEM;
+	/* Generic EIP97/EIP197 device probing */
+	ret = safexcel_probe_generic(pdev, priv, 0);
+	if (ret)
 		goto err_reg_clk;
-	}
-
-	safexcel_configure(priv);
-
-	priv->ring = devm_kcalloc(dev, priv->config.rings,
-				  sizeof(*priv->ring),
-				  GFP_KERNEL);
-	if (!priv->ring) {
-		ret = -ENOMEM;
-		goto err_reg_clk;
-	}
-
-	for (i = 0; i < priv->config.rings; i++) {
-		char irq_name[6] = {0}; /* "ringX\0" */
-		char wq_name[9] = {0}; /* "wq_ringX\0" */
-		int irq;
-		struct safexcel_ring_irq_data *ring_irq;
-
-		ret = safexcel_init_ring_descriptors(priv,
-						     &priv->ring[i].cdr,
-						     &priv->ring[i].rdr);
-		if (ret)
-			goto err_reg_clk;
-
-		priv->ring[i].rdr_req = devm_kcalloc(dev,
-			EIP197_DEFAULT_RING_SIZE,
-			sizeof(priv->ring[i].rdr_req),
-			GFP_KERNEL);
-		if (!priv->ring[i].rdr_req) {
-			ret = -ENOMEM;
-			goto err_reg_clk;
-		}
-
-		ring_irq = devm_kzalloc(dev, sizeof(*ring_irq), GFP_KERNEL);
-		if (!ring_irq) {
-			ret = -ENOMEM;
-			goto err_reg_clk;
-		}
-
-		ring_irq->priv = priv;
-		ring_irq->ring = i;
-
-		snprintf(irq_name, 6, "ring%d", i);
-		irq = safexcel_request_ring_irq(pdev, irq_name, safexcel_irq_ring,
-						safexcel_irq_ring_thread,
-						ring_irq);
-		if (irq < 0) {
-			ret = irq;
-			goto err_reg_clk;
-		}
-
-		priv->ring[i].work_data.priv = priv;
-		priv->ring[i].work_data.ring = i;
-		INIT_WORK(&priv->ring[i].work_data.work, safexcel_dequeue_work);
-
-		snprintf(wq_name, 9, "wq_ring%d", i);
-		priv->ring[i].workqueue = create_singlethread_workqueue(wq_name);
-		if (!priv->ring[i].workqueue) {
-			ret = -ENOMEM;
-			goto err_reg_clk;
-		}
-
-		priv->ring[i].requests = 0;
-		priv->ring[i].busy = false;
-
-		crypto_init_queue(&priv->ring[i].queue,
-				  EIP197_DEFAULT_RING_SIZE);
-
-		spin_lock_init(&priv->ring[i].lock);
-		spin_lock_init(&priv->ring[i].queue_lock);
-	}
-
-	platform_set_drvdata(pdev, priv);
-	atomic_set(&priv->ring_used, 0);
-
-	ret = safexcel_hw_init(priv);
-	if (ret) {
-		dev_err(dev, "EIP h/w init failed (%d)\n", ret);
-		goto err_reg_clk;
-	}
-
-	ret = safexcel_register_algorithms(priv);
-	if (ret) {
-		dev_err(dev, "Failed to register algorithms (%d)\n", ret);
-		goto err_reg_clk;
-	}
 
 	return 0;
 
@@ -1141,25 +1615,6 @@
 	return ret;
 }
 
-static void safexcel_hw_reset_rings(struct safexcel_crypto_priv *priv)
-{
-	int i;
-
-	for (i = 0; i < priv->config.rings; i++) {
-		/* clear any pending interrupt */
-		writel(GENMASK(5, 0), EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_STAT);
-		writel(GENMASK(7, 0), EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_STAT);
-
-		/* Reset the CDR base address */
-		writel(0, EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
-		writel(0, EIP197_HIA_CDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
-
-		/* Reset the RDR base address */
-		writel(0, EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_LO);
-		writel(0, EIP197_HIA_RDR(priv, i) + EIP197_HIA_xDR_RING_BASE_ADDR_HI);
-	}
-}
-
 static int safexcel_remove(struct platform_device *pdev)
 {
 	struct safexcel_crypto_priv *priv = platform_get_drvdata(pdev);
@@ -1179,30 +1634,28 @@
 static const struct of_device_id safexcel_of_match_table[] = {
 	{
 		.compatible = "inside-secure,safexcel-eip97ies",
-		.data = (void *)EIP97IES,
+		.data = (void *)EIP97IES_MRVL,
 	},
 	{
 		.compatible = "inside-secure,safexcel-eip197b",
-		.data = (void *)EIP197B,
+		.data = (void *)EIP197B_MRVL,
 	},
 	{
 		.compatible = "inside-secure,safexcel-eip197d",
-		.data = (void *)EIP197D,
+		.data = (void *)EIP197D_MRVL,
 	},
+	/* For backward compatibility and intended for generic use */
 	{
-		/* Deprecated. Kept for backward compatibility. */
 		.compatible = "inside-secure,safexcel-eip97",
-		.data = (void *)EIP97IES,
+		.data = (void *)EIP97IES_MRVL,
 	},
 	{
-		/* Deprecated. Kept for backward compatibility. */
 		.compatible = "inside-secure,safexcel-eip197",
-		.data = (void *)EIP197B,
+		.data = (void *)EIP197B_MRVL,
 	},
 	{},
 };
 
-
 static struct platform_driver  crypto_safexcel = {
 	.probe		= safexcel_probe,
 	.remove		= safexcel_remove,
@@ -1211,10 +1664,166 @@
 		.of_match_table = safexcel_of_match_table,
 	},
 };
-module_platform_driver(crypto_safexcel);
+#endif
+
+#if IS_ENABLED(CONFIG_PCI)
+/* PCIE devices - i.e. Inside Secure development boards */
+
+static int safexcel_pci_probe(struct pci_dev *pdev,
+			       const struct pci_device_id *ent)
+{
+	struct device *dev = &pdev->dev;
+	struct safexcel_crypto_priv *priv;
+	void __iomem *pciebase;
+	int rc;
+	u32 val;
+
+	dev_dbg(dev, "Probing PCIE device: vendor %04x, device %04x, subv %04x, subdev %04x, ctxt %lx\n",
+		ent->vendor, ent->device, ent->subvendor,
+		ent->subdevice, ent->driver_data);
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->dev = dev;
+	priv->version = (enum safexcel_eip_version)ent->driver_data;
+
+	pci_set_drvdata(pdev, priv);
+
+	/* enable the device */
+	rc = pcim_enable_device(pdev);
+	if (rc) {
+		dev_err(dev, "Failed to enable PCI device\n");
+		return rc;
+	}
+
+	/* take ownership of PCI BAR0 */
+	rc = pcim_iomap_regions(pdev, 1, "crypto_safexcel");
+	if (rc) {
+		dev_err(dev, "Failed to map IO region for BAR0\n");
+		return rc;
+	}
+	priv->base = pcim_iomap_table(pdev)[0];
+
+	if (priv->version == EIP197_DEVBRD) {
+		dev_dbg(dev, "Device identified as FPGA based development board - applying HW reset\n");
+
+		rc = pcim_iomap_regions(pdev, 4, "crypto_safexcel");
+		if (rc) {
+			dev_err(dev, "Failed to map IO region for BAR4\n");
+			return rc;
+		}
+
+		pciebase = pcim_iomap_table(pdev)[2];
+		val = readl(pciebase + EIP197_XLX_IRQ_BLOCK_ID_ADDR);
+		if ((val >> 16) == EIP197_XLX_IRQ_BLOCK_ID_VALUE) {
+			dev_dbg(dev, "Detected Xilinx PCIE IRQ block version %d, multiple MSI support enabled\n",
+				(val & 0xff));
+
+			/* Setup MSI identity map mapping */
+			writel(EIP197_XLX_USER_VECT_LUT0_IDENT,
+			       pciebase + EIP197_XLX_USER_VECT_LUT0_ADDR);
+			writel(EIP197_XLX_USER_VECT_LUT1_IDENT,
+			       pciebase + EIP197_XLX_USER_VECT_LUT1_ADDR);
+			writel(EIP197_XLX_USER_VECT_LUT2_IDENT,
+			       pciebase + EIP197_XLX_USER_VECT_LUT2_ADDR);
+			writel(EIP197_XLX_USER_VECT_LUT3_IDENT,
+			       pciebase + EIP197_XLX_USER_VECT_LUT3_ADDR);
+
+			/* Enable all device interrupts */
+			writel(GENMASK(31, 0),
+			       pciebase + EIP197_XLX_USER_INT_ENB_MSK);
+		} else {
+			dev_err(dev, "Unrecognised IRQ block identifier %x\n",
+				val);
+			return -ENODEV;
+		}
+
+		/* HW reset FPGA dev board */
+		/* assert reset */
+		writel(1, priv->base + EIP197_XLX_GPIO_BASE);
+		wmb(); /* maintain strict ordering for accesses here */
+		/* deassert reset */
+		writel(0, priv->base + EIP197_XLX_GPIO_BASE);
+		wmb(); /* maintain strict ordering for accesses here */
+	}
+
+	/* enable bus mastering */
+	pci_set_master(pdev);
+
+	/* Generic EIP97/EIP197 device probing */
+	rc = safexcel_probe_generic(pdev, priv, 1);
+	return rc;
+}
+
+void safexcel_pci_remove(struct pci_dev *pdev)
+{
+	struct safexcel_crypto_priv *priv = pci_get_drvdata(pdev);
+	int i;
+
+	safexcel_unregister_algorithms(priv);
+
+	for (i = 0; i < priv->config.rings; i++)
+		destroy_workqueue(priv->ring[i].workqueue);
+
+	safexcel_hw_reset_rings(priv);
+}
+
+static const struct pci_device_id safexcel_pci_ids[] = {
+	{
+		PCI_DEVICE_SUB(PCI_VENDOR_ID_XILINX, 0x9038,
+			       0x16ae, 0xc522),
+		.driver_data = EIP197_DEVBRD,
+	},
+	{},
+};
+
+MODULE_DEVICE_TABLE(pci, safexcel_pci_ids);
+
+static struct pci_driver safexcel_pci_driver = {
+	.name          = "crypto-safexcel",
+	.id_table      = safexcel_pci_ids,
+	.probe         = safexcel_pci_probe,
+	.remove        = safexcel_pci_remove,
+};
+#endif
+
+static int __init safexcel_init(void)
+{
+	int rc;
+
+#if IS_ENABLED(CONFIG_OF)
+		/* Register platform driver */
+		platform_driver_register(&crypto_safexcel);
+#endif
+
+#if IS_ENABLED(CONFIG_PCI)
+		/* Register PCI driver */
+		rc = pci_register_driver(&safexcel_pci_driver);
+#endif
+
+	return 0;
+}
+
+static void __exit safexcel_exit(void)
+{
+#if IS_ENABLED(CONFIG_OF)
+		/* Unregister platform driver */
+		platform_driver_unregister(&crypto_safexcel);
+#endif
+
+#if IS_ENABLED(CONFIG_PCI)
+		/* Unregister PCI driver if successfully registered before */
+		pci_unregister_driver(&safexcel_pci_driver);
+#endif
+}
+
+module_init(safexcel_init);
+module_exit(safexcel_exit);
 
 MODULE_AUTHOR("Antoine Tenart <antoine.tenart@free-electrons.com>");
 MODULE_AUTHOR("Ofer Heifetz <oferh@marvell.com>");
 MODULE_AUTHOR("Igal Liberman <igall@marvell.com>");
-MODULE_DESCRIPTION("Support for SafeXcel cryptographic engine EIP197");
+MODULE_DESCRIPTION("Support for SafeXcel cryptographic engines: EIP97 & EIP197");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/crypto/inside-secure/safexcel.h b/drivers/crypto/inside-secure/safexcel.h
index e0c202f..930cc48 100644
--- a/drivers/crypto/inside-secure/safexcel.h
+++ b/drivers/crypto/inside-secure/safexcel.h
@@ -14,14 +14,23 @@
 #include <crypto/sha.h>
 #include <crypto/skcipher.h>
 
-#define EIP197_HIA_VERSION_LE			0xca35
-#define EIP197_HIA_VERSION_BE			0x35ca
+#define EIP197_HIA_VERSION_BE			0xca35
+#define EIP197_HIA_VERSION_LE			0x35ca
+#define EIP97_VERSION_LE			0x9e61
+#define EIP197_VERSION_LE			0x3ac5
+#define EIP96_VERSION_LE			0x9f60
+#define EIP197_REG_LO16(reg)			(reg & 0xffff)
+#define EIP197_REG_HI16(reg)			((reg >> 16) & 0xffff)
+#define EIP197_VERSION_MASK(reg)		((reg >> 16) & 0xfff)
+#define EIP197_VERSION_SWAP(reg)		(((reg & 0xf0) << 4) | \
+						((reg >> 4) & 0xf0) | \
+						((reg >> 12) & 0xf))
 
 /* Static configuration */
 #define EIP197_DEFAULT_RING_SIZE		400
-#define EIP197_MAX_TOKENS			8
+#define EIP197_MAX_TOKENS			18
 #define EIP197_MAX_RINGS			4
-#define EIP197_FETCH_COUNT			1
+#define EIP197_FETCH_DEPTH			2
 #define EIP197_MAX_BATCH_SZ			64
 
 #define EIP197_GFP_FLAGS(base)	((base).flags & CRYPTO_TFM_REQ_MAY_SLEEP ? \
@@ -38,6 +47,27 @@
 	char __##name##_desc[size] CRYPTO_MINALIGN_ATTR; \
 	struct type##_request *name = (void *)__##name##_desc
 
+/* Xilinx dev board base offsets */
+#define EIP197_XLX_GPIO_BASE		0x200000
+#define EIP197_XLX_IRQ_BLOCK_ID_ADDR	0x2000
+#define EIP197_XLX_IRQ_BLOCK_ID_VALUE	0x1fc2
+#define EIP197_XLX_USER_INT_ENB_MSK	0x2004
+#define EIP197_XLX_USER_INT_ENB_SET	0x2008
+#define EIP197_XLX_USER_INT_ENB_CLEAR	0x200c
+#define EIP197_XLX_USER_INT_BLOCK	0x2040
+#define EIP197_XLX_USER_INT_PEND	0x2048
+#define EIP197_XLX_USER_VECT_LUT0_ADDR	0x2080
+#define EIP197_XLX_USER_VECT_LUT0_IDENT	0x03020100
+#define EIP197_XLX_USER_VECT_LUT1_ADDR	0x2084
+#define EIP197_XLX_USER_VECT_LUT1_IDENT	0x07060504
+#define EIP197_XLX_USER_VECT_LUT2_ADDR	0x2088
+#define EIP197_XLX_USER_VECT_LUT2_IDENT	0x0b0a0908
+#define EIP197_XLX_USER_VECT_LUT3_ADDR	0x208c
+#define EIP197_XLX_USER_VECT_LUT3_IDENT	0x0f0e0d0c
+
+/* Helper defines for probe function */
+#define EIP197_IRQ_NUMBER(i, is_pci)	(i + is_pci)
+
 /* Register base offsets */
 #define EIP197_HIA_AIC(priv)		((priv)->base + (priv)->offsets.hia_aic)
 #define EIP197_HIA_AIC_G(priv)		((priv)->base + (priv)->offsets.hia_aic_g)
@@ -49,6 +79,7 @@
 #define EIP197_HIA_DSE_THR(priv)	((priv)->base + (priv)->offsets.hia_dse_thr)
 #define EIP197_HIA_GEN_CFG(priv)	((priv)->base + (priv)->offsets.hia_gen_cfg)
 #define EIP197_PE(priv)			((priv)->base + (priv)->offsets.pe)
+#define EIP197_GLOBAL(priv)		((priv)->base + (priv)->offsets.global)
 
 /* EIP197 base offsets */
 #define EIP197_HIA_AIC_BASE		0x90000
@@ -61,6 +92,7 @@
 #define EIP197_HIA_DSE_THR_BASE		0x8d040
 #define EIP197_HIA_GEN_CFG_BASE		0xf0000
 #define EIP197_PE_BASE			0xa0000
+#define EIP197_GLOBAL_BASE		0xf0000
 
 /* EIP97 base offsets */
 #define EIP97_HIA_AIC_BASE		0x0
@@ -73,6 +105,7 @@
 #define EIP97_HIA_DSE_THR_BASE		0xf600
 #define EIP97_HIA_GEN_CFG_BASE		0x10000
 #define EIP97_PE_BASE			0x10000
+#define EIP97_GLOBAL_BASE		0x10000
 
 /* CDR/RDR register offsets */
 #define EIP197_HIA_xDR_OFF(priv, r)		(EIP197_HIA_AIC_xDR(priv) + (r) * 0x1000)
@@ -115,16 +148,22 @@
 #define EIP197_PE_IN_TBUF_THRES(n)		(0x0100 + (0x2000 * (n)))
 #define EIP197_PE_ICE_SCRATCH_RAM(n)		(0x0800 + (0x2000 * (n)))
 #define EIP197_PE_ICE_PUE_CTRL(n)		(0x0c80 + (0x2000 * (n)))
+#define EIP197_PE_ICE_PUTF_CTRL(n)		(0x0d00 + (0x2000 * (n)))
 #define EIP197_PE_ICE_SCRATCH_CTRL(n)		(0x0d04 + (0x2000 * (n)))
 #define EIP197_PE_ICE_FPP_CTRL(n)		(0x0d80 + (0x2000 * (n)))
+#define EIP197_PE_ICE_PPTF_CTRL(n)		(0x0e00 + (0x2000 * (n)))
 #define EIP197_PE_ICE_RAM_CTRL(n)		(0x0ff0 + (0x2000 * (n)))
 #define EIP197_PE_EIP96_TOKEN_CTRL(n)		(0x1000 + (0x2000 * (n)))
 #define EIP197_PE_EIP96_FUNCTION_EN(n)		(0x1004 + (0x2000 * (n)))
 #define EIP197_PE_EIP96_CONTEXT_CTRL(n)		(0x1008 + (0x2000 * (n)))
 #define EIP197_PE_EIP96_CONTEXT_STAT(n)		(0x100c + (0x2000 * (n)))
+#define EIP197_PE_EIP96_FUNCTION2_EN(n)		(0x1030 + (0x2000 * (n)))
+#define EIP197_PE_EIP96_OPTIONS(n)		(0x13f8 + (0x2000 * (n)))
+#define EIP197_PE_EIP96_VERSION(n)		(0x13fc + (0x2000 * (n)))
 #define EIP197_PE_OUT_DBUF_THRES(n)		(0x1c00 + (0x2000 * (n)))
 #define EIP197_PE_OUT_TBUF_THRES(n)		(0x1d00 + (0x2000 * (n)))
 #define EIP197_MST_CTRL				0xfff4
+#define EIP197_VERSION				0xfffc
 
 /* EIP197-specific registers, no indirection */
 #define EIP197_CLASSIFICATION_RAMS		0xe0000
@@ -139,6 +178,12 @@
 #define EIP197_TRC_ECCADMINSTAT			0xf0838
 #define EIP197_TRC_ECCDATASTAT			0xf083c
 #define EIP197_TRC_ECCDATA			0xf0840
+#define EIP197_FLUE_CACHEBASE_LO(n)		(0xf6000 + (32 * (n)))
+#define EIP197_FLUE_CACHEBASE_HI(n)		(0xf6004 + (32 * (n)))
+#define EIP197_FLUE_CONFIG(n)			(0xf6010 + (32 * (n)))
+#define EIP197_FLUE_OFFSETS			0xf6808
+#define EIP197_FLUE_ARC4_OFFSET			0xf680c
+#define EIP197_FLUE_IFC_LUT(n)			(0xf6820 + (4 * (n)))
 #define EIP197_CS_RAM_CTRL			0xf7ff0
 
 /* EIP197_HIA_xDR_DESC_SIZE */
@@ -186,6 +231,19 @@
 #define EIP197_N_PES_OFFSET			4
 #define EIP197_N_PES_MASK			GENMASK(4, 0)
 #define EIP97_N_PES_MASK			GENMASK(2, 0)
+#define EIP197_HWDATAW_OFFSET			25
+#define EIP197_HWDATAW_MASK			GENMASK(3, 0)
+#define EIP97_HWDATAW_MASK			GENMASK(2, 0)
+#define EIP197_CFSIZE_OFFSET			9
+#define EIP197_CFSIZE_ADJUST			4
+#define EIP97_CFSIZE_OFFSET			8
+#define EIP197_CFSIZE_MASK			GENMASK(3, 0)
+#define EIP97_CFSIZE_MASK			GENMASK(4, 0)
+#define EIP197_RFSIZE_OFFSET			12
+#define EIP197_RFSIZE_ADJUST			4
+#define EIP97_RFSIZE_OFFSET			12
+#define EIP197_RFSIZE_MASK			GENMASK(3, 0)
+#define EIP97_RFSIZE_MASK			GENMASK(4, 0)
 
 /* EIP197_HIA_AIC_R_ENABLE_CTRL */
 #define EIP197_CDR_IRQ(n)			BIT((n) * 2)
@@ -207,6 +265,11 @@
 #define EIP197_DxE_THR_CTRL_EN			BIT(30)
 #define EIP197_DxE_THR_CTRL_RESET_PE		BIT(31)
 
+/* EIP197_PE_ICE_PUE/FPP_CTRL */
+#define EIP197_PE_ICE_UENG_START_OFFSET(n)	((n) << 16)
+#define EIP197_PE_ICE_UENG_INIT_ALIGN_MASK	0x7ff0
+#define EIP197_PE_ICE_UENG_DEBUG_RESET		BIT(3)
+
 /* EIP197_HIA_AIC_G_ENABLED_STAT */
 #define EIP197_G_IRQ_DFE(n)			BIT((n) << 1)
 #define EIP197_G_IRQ_DSE(n)			BIT(((n) << 1) + 1)
@@ -223,6 +286,7 @@
 #define EIP197_MST_CTRL_TX_MAX_CMD(n)		(((n) & 0xf) << 20)
 #define EIP197_MST_CTRL_BYTE_SWAP		BIT(24)
 #define EIP197_MST_CTRL_NO_BYTE_SWAP		BIT(25)
+#define EIP197_MST_CTRL_BYTE_SWAP_BITS          GENMASK(25, 24)
 
 /* EIP197_PE_IN_DBUF/TBUF_THRES */
 #define EIP197_PE_IN_xBUF_THRES_MIN(n)		((n) << 8)
@@ -252,45 +316,20 @@
 
 /* EIP197_PE_EIP96_TOKEN_CTRL */
 #define EIP197_PE_EIP96_TOKEN_CTRL_CTX_UPDATES		BIT(16)
-#define EIP197_PE_EIP96_TOKEN_CTRL_REUSE_CTX		BIT(19)
-#define EIP197_PE_EIP96_TOKEN_CTRL_POST_REUSE_CTX	BIT(20)
+#define EIP197_PE_EIP96_TOKEN_CTRL_NO_TOKEN_WAIT	BIT(17)
+#define EIP197_PE_EIP96_TOKEN_CTRL_ENABLE_TIMEOUT	BIT(22)
 
 /* EIP197_PE_EIP96_FUNCTION_EN */
-#define EIP197_FUNCTION_RSVD			(BIT(6) | BIT(15) | BIT(20) | BIT(23))
-#define EIP197_PROTOCOL_HASH_ONLY		BIT(0)
-#define EIP197_PROTOCOL_ENCRYPT_ONLY		BIT(1)
-#define EIP197_PROTOCOL_HASH_ENCRYPT		BIT(2)
-#define EIP197_PROTOCOL_HASH_DECRYPT		BIT(3)
-#define EIP197_PROTOCOL_ENCRYPT_HASH		BIT(4)
-#define EIP197_PROTOCOL_DECRYPT_HASH		BIT(5)
-#define EIP197_ALG_ARC4				BIT(7)
-#define EIP197_ALG_AES_ECB			BIT(8)
-#define EIP197_ALG_AES_CBC			BIT(9)
-#define EIP197_ALG_AES_CTR_ICM			BIT(10)
-#define EIP197_ALG_AES_OFB			BIT(11)
-#define EIP197_ALG_AES_CFB			BIT(12)
-#define EIP197_ALG_DES_ECB			BIT(13)
-#define EIP197_ALG_DES_CBC			BIT(14)
-#define EIP197_ALG_DES_OFB			BIT(16)
-#define EIP197_ALG_DES_CFB			BIT(17)
-#define EIP197_ALG_3DES_ECB			BIT(18)
-#define EIP197_ALG_3DES_CBC			BIT(19)
-#define EIP197_ALG_3DES_OFB			BIT(21)
-#define EIP197_ALG_3DES_CFB			BIT(22)
-#define EIP197_ALG_MD5				BIT(24)
-#define EIP197_ALG_HMAC_MD5			BIT(25)
-#define EIP197_ALG_SHA1				BIT(26)
-#define EIP197_ALG_HMAC_SHA1			BIT(27)
-#define EIP197_ALG_SHA2				BIT(28)
-#define EIP197_ALG_HMAC_SHA2			BIT(29)
-#define EIP197_ALG_AES_XCBC_MAC			BIT(30)
-#define EIP197_ALG_GCM_HASH			BIT(31)
+#define EIP197_FUNCTION_ALL			0xffffffff
 
 /* EIP197_PE_EIP96_CONTEXT_CTRL */
 #define EIP197_CONTEXT_SIZE(n)			(n)
 #define EIP197_ADDRESS_MODE			BIT(8)
 #define EIP197_CONTROL_MODE			BIT(9)
 
+/* EIP197_FLUE_CONFIG */
+#define EIP197_FLUE_CONFIG_MAGIC		0xc7000004
+
 /* Context Control */
 struct safexcel_context_record {
 	u32 control0;
@@ -320,6 +359,7 @@
 #define CONTEXT_CONTROL_CRYPTO_ALG_AES192	(0x6 << 17)
 #define CONTEXT_CONTROL_CRYPTO_ALG_AES256	(0x7 << 17)
 #define CONTEXT_CONTROL_DIGEST_PRECOMPUTED	(0x1 << 21)
+#define CONTEXT_CONTROL_DIGEST_XCM		(0x2 << 21)
 #define CONTEXT_CONTROL_DIGEST_HMAC		(0x3 << 21)
 #define CONTEXT_CONTROL_CRYPTO_ALG_MD5		(0x0 << 23)
 #define CONTEXT_CONTROL_CRYPTO_ALG_SHA1		(0x2 << 23)
@@ -327,12 +367,21 @@
 #define CONTEXT_CONTROL_CRYPTO_ALG_SHA256	(0x3 << 23)
 #define CONTEXT_CONTROL_CRYPTO_ALG_SHA384	(0x6 << 23)
 #define CONTEXT_CONTROL_CRYPTO_ALG_SHA512	(0x5 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_GHASH	(0x4 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_XCBC128	(0x1 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_XCBC192	(0x2 << 23)
+#define CONTEXT_CONTROL_CRYPTO_ALG_XCBC256	(0x3 << 23)
 #define CONTEXT_CONTROL_INV_FR			(0x5 << 24)
 #define CONTEXT_CONTROL_INV_TR			(0x6 << 24)
 
 /* control1 */
 #define CONTEXT_CONTROL_CRYPTO_MODE_ECB		(0 << 0)
 #define CONTEXT_CONTROL_CRYPTO_MODE_CBC		(1 << 0)
+#define CONTEXT_CONTROL_CRYPTO_MODE_OFB		(4 << 0)
+#define CONTEXT_CONTROL_CRYPTO_MODE_CFB		(5 << 0)
+#define CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD	(6 << 0)
+#define CONTEXT_CONTROL_CRYPTO_MODE_XTS		(7 << 0)
+#define CONTEXT_CONTROL_CRYPTO_MODE_XCM		((6 << 0) | BIT(17))
 #define CONTEXT_CONTROL_IV0			BIT(5)
 #define CONTEXT_CONTROL_IV1			BIT(6)
 #define CONTEXT_CONTROL_IV2			BIT(7)
@@ -342,6 +391,9 @@
 #define CONTEXT_CONTROL_CRYPTO_STORE		BIT(12)
 #define CONTEXT_CONTROL_HASH_STORE		BIT(19)
 
+#define EIP197_XCM_MODE_GCM			1
+#define EIP197_XCM_MODE_CCM			2
+
 /* The hash counter given to the engine in the context has a granularity of
  * 64 bits.
  */
@@ -352,6 +404,8 @@
 #define EIP197_TRC_ENABLE_1			BIT(5)
 #define EIP197_TRC_ENABLE_2			BIT(6)
 #define EIP197_TRC_ENABLE_MASK			GENMASK(6, 4)
+#define EIP197_CS_BANKSEL_MASK			GENMASK(14, 12)
+#define EIP197_CS_BANKSEL_OFS			12
 
 /* EIP197_TRC_PARAMS */
 #define EIP197_TRC_PARAMS_SW_RESET		BIT(0)
@@ -369,19 +423,11 @@
 #define EIP197_TRC_PARAMS2_RC_SZ_SMALL(n)	((n) << 18)
 
 /* Cache helpers */
-#define EIP197B_CS_RC_MAX			52
-#define EIP197D_CS_RC_MAX			96
+#define EIP197_CS_TRC_REC_WC			64
 #define EIP197_CS_RC_SIZE			(4 * sizeof(u32))
 #define EIP197_CS_RC_NEXT(x)			(x)
 #define EIP197_CS_RC_PREV(x)			((x) << 10)
 #define EIP197_RC_NULL				0x3ff
-#define EIP197B_CS_TRC_REC_WC			59
-#define EIP197D_CS_TRC_REC_WC			64
-#define EIP197B_CS_TRC_LG_REC_WC		73
-#define EIP197D_CS_TRC_LG_REC_WC		80
-#define EIP197B_CS_HT_WC			64
-#define EIP197D_CS_HT_WC			256
-
 
 /* Result data */
 struct result_data_desc {
@@ -423,6 +469,14 @@
 	struct result_data_desc result_data;
 } __packed;
 
+/*
+ * The EIP(1)97 only needs to fetch the descriptor part of
+ * the result descriptor, not the result token part!
+ */
+#define EIP197_RD64_FETCH_SIZE		((sizeof(struct safexcel_result_desc) -\
+					  sizeof(struct result_data_desc)) /\
+					 sizeof(u32))
+
 struct safexcel_token {
 	u32 packet_length:17;
 	u8 stat:2;
@@ -442,6 +496,7 @@
 #define EIP197_TOKEN_OPCODE_INSERT		0x2
 #define EIP197_TOKEN_OPCODE_NOOP		EIP197_TOKEN_OPCODE_INSERT
 #define EIP197_TOKEN_OPCODE_RETRIEVE		0x4
+#define EIP197_TOKEN_OPCODE_INSERT_REMRES	0xa
 #define EIP197_TOKEN_OPCODE_VERIFY		0xd
 #define EIP197_TOKEN_OPCODE_CTX_ACCESS		0xe
 #define EIP197_TOKEN_OPCODE_BYPASS		GENMASK(3, 0)
@@ -455,10 +510,11 @@
 /* Instructions */
 #define EIP197_TOKEN_INS_INSERT_HASH_DIGEST	0x1c
 #define EIP197_TOKEN_INS_ORIGIN_IV0		0x14
+#define EIP197_TOKEN_INS_ORIGIN_TOKEN		0x1b
 #define EIP197_TOKEN_INS_ORIGIN_LEN(x)		((x) << 5)
 #define EIP197_TOKEN_INS_TYPE_OUTPUT		BIT(5)
 #define EIP197_TOKEN_INS_TYPE_HASH		BIT(6)
-#define EIP197_TOKEN_INS_TYPE_CRYTO		BIT(7)
+#define EIP197_TOKEN_INS_TYPE_CRYPTO		BIT(7)
 #define EIP197_TOKEN_INS_LAST			BIT(8)
 
 /* Processing Engine Control Data  */
@@ -509,6 +565,11 @@
  * Internal structures & functions
  */
 
+#define EIP197_FW_TERMINAL_NOPS		2
+#define EIP197_FW_START_POLLCNT		16
+#define EIP197_FW_PUE_READY		0x14
+#define EIP197_FW_FPP_READY		0x18
+
 enum eip197_fw {
 	FW_IFPP = 0,
 	FW_IPUE,
@@ -581,10 +642,42 @@
 	struct crypto_async_request *backlog;
 };
 
+/* EIP integration context flags */
 enum safexcel_eip_version {
-	EIP97IES = BIT(0),
-	EIP197B  = BIT(1),
-	EIP197D  = BIT(2),
+	/* Platform (EIP integration context) specifier */
+	EIP97IES_MRVL,
+	EIP197B_MRVL,
+	EIP197D_MRVL,
+	EIP197_DEVBRD
+};
+
+/* Priority we use for advertising our algorithms */
+#define SAFEXCEL_CRA_PRIORITY		300
+
+/* EIP algorithm presence flags */
+enum safexcel_eip_algorithms {
+	SAFEXCEL_ALG_BC0      = BIT(5),
+	SAFEXCEL_ALG_SM4      = BIT(6),
+	SAFEXCEL_ALG_SM3      = BIT(7),
+	SAFEXCEL_ALG_CHACHA20 = BIT(8),
+	SAFEXCEL_ALG_POLY1305 = BIT(9),
+	SAFEXCEL_SEQMASK_256   = BIT(10),
+	SAFEXCEL_SEQMASK_384   = BIT(11),
+	SAFEXCEL_ALG_AES      = BIT(12),
+	SAFEXCEL_ALG_AES_XFB  = BIT(13),
+	SAFEXCEL_ALG_DES      = BIT(15),
+	SAFEXCEL_ALG_DES_XFB  = BIT(16),
+	SAFEXCEL_ALG_ARC4     = BIT(18),
+	SAFEXCEL_ALG_AES_XTS  = BIT(20),
+	SAFEXCEL_ALG_WIRELESS = BIT(21),
+	SAFEXCEL_ALG_MD5      = BIT(22),
+	SAFEXCEL_ALG_SHA1     = BIT(23),
+	SAFEXCEL_ALG_SHA2_256 = BIT(25),
+	SAFEXCEL_ALG_SHA2_512 = BIT(26),
+	SAFEXCEL_ALG_XCBC_MAC = BIT(27),
+	SAFEXCEL_ALG_CBC_MAC_ALL = BIT(29),
+	SAFEXCEL_ALG_GHASH    = BIT(30),
+	SAFEXCEL_ALG_SHA3     = BIT(31),
 };
 
 struct safexcel_register_offsets {
@@ -598,10 +691,22 @@
 	u32 hia_dse_thr;
 	u32 hia_gen_cfg;
 	u32 pe;
+	u32 global;
 };
 
 enum safexcel_flags {
-	EIP197_TRC_CACHE = BIT(0),
+	EIP197_TRC_CACHE	= BIT(0),
+	SAFEXCEL_HW_EIP197	= BIT(1),
+};
+
+struct safexcel_hwconfig {
+	enum safexcel_eip_algorithms algo_flags;
+	int hwver;
+	int hiaver;
+	int pever;
+	int hwdataw;
+	int hwcfsize;
+	int hwrfsize;
 };
 
 struct safexcel_crypto_priv {
@@ -613,6 +718,7 @@
 
 	enum safexcel_eip_version version;
 	struct safexcel_register_offsets offsets;
+	struct safexcel_hwconfig hwconfig;
 	u32 flags;
 
 	/* context DMA pool */
@@ -637,14 +743,16 @@
 	bool exit_inv;
 };
 
+#define HASH_CACHE_SIZE			SHA512_BLOCK_SIZE
+
 struct safexcel_ahash_export_state {
-	u64 len[2];
-	u64 processed[2];
+	u64 len;
+	u64 processed;
 
 	u32 digest;
 
 	u32 state[SHA512_DIGEST_SIZE / sizeof(u32)];
-	u8 cache[SHA512_BLOCK_SIZE << 1];
+	u8 cache[HASH_CACHE_SIZE];
 };
 
 /*
@@ -655,7 +763,7 @@
 struct safexcel_alg_template {
 	struct safexcel_crypto_priv *priv;
 	enum safexcel_alg_type type;
-	u32 engines;
+	enum safexcel_eip_algorithms algo_mask;
 	union {
 		struct skcipher_alg skcipher;
 		struct aead_alg aead;
@@ -716,6 +824,9 @@
 extern struct safexcel_alg_template safexcel_alg_cbc_des3_ede;
 extern struct safexcel_alg_template safexcel_alg_ecb_aes;
 extern struct safexcel_alg_template safexcel_alg_cbc_aes;
+extern struct safexcel_alg_template safexcel_alg_cfb_aes;
+extern struct safexcel_alg_template safexcel_alg_ofb_aes;
+extern struct safexcel_alg_template safexcel_alg_ctr_aes;
 extern struct safexcel_alg_template safexcel_alg_md5;
 extern struct safexcel_alg_template safexcel_alg_sha1;
 extern struct safexcel_alg_template safexcel_alg_sha224;
@@ -733,5 +844,14 @@
 extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_aes;
 extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_cbc_aes;
 extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_cbc_aes;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_des3_ede;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_ctr_aes;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_ctr_aes;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_ctr_aes;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_ctr_aes;
+extern struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_ctr_aes;
+extern struct safexcel_alg_template safexcel_alg_xts_aes;
+extern struct safexcel_alg_template safexcel_alg_gcm;
+extern struct safexcel_alg_template safexcel_alg_ccm;
 
 #endif
diff --git a/drivers/crypto/inside-secure/safexcel_cipher.c b/drivers/crypto/inside-secure/safexcel_cipher.c
index 8cdbdbe..ef51f8c2 100644
--- a/drivers/crypto/inside-secure/safexcel_cipher.c
+++ b/drivers/crypto/inside-secure/safexcel_cipher.c
@@ -12,8 +12,12 @@
 #include <crypto/aead.h>
 #include <crypto/aes.h>
 #include <crypto/authenc.h>
-#include <crypto/des.h>
+#include <crypto/ctr.h>
+#include <crypto/internal/des.h>
+#include <crypto/gcm.h>
+#include <crypto/ghash.h>
 #include <crypto/sha.h>
+#include <crypto/xts.h>
 #include <crypto/skcipher.h>
 #include <crypto/internal/aead.h>
 #include <crypto/internal/skcipher.h>
@@ -38,15 +42,19 @@
 	u32 mode;
 	enum safexcel_cipher_alg alg;
 	bool aead;
+	int  xcm; /* 0=authenc, 1=GCM, 2 reserved for CCM */
 
-	__le32 key[8];
-	unsigned int key_len;
+	__le32 key[16];
+	u32 nonce;
+	unsigned int key_len, xts;
 
 	/* All the below is AEAD specific */
 	u32 hash_alg;
 	u32 state_sz;
 	u32 ipad[SHA512_DIGEST_SIZE / sizeof(u32)];
 	u32 opad[SHA512_DIGEST_SIZE / sizeof(u32)];
+
+	struct crypto_cipher *hkaes;
 };
 
 struct safexcel_cipher_req {
@@ -54,16 +62,47 @@
 	/* Number of result descriptors associated to the request */
 	unsigned int rdescs;
 	bool needs_inv;
+	int  nr_src, nr_dst;
 };
 
-static void safexcel_skcipher_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
-				    struct safexcel_command_desc *cdesc,
-				    u32 length)
+static void safexcel_cipher_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
+				  struct safexcel_command_desc *cdesc)
 {
-	struct safexcel_token *token;
-	u32 offset = 0, block_sz = 0;
+	u32 block_sz = 0;
 
-	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
+	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD) {
+		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
+
+		/* 32 bit nonce */
+		cdesc->control_data.token[0] = ctx->nonce;
+		/* 64 bit IV part */
+		memcpy(&cdesc->control_data.token[1], iv, 8);
+		/* 32 bit counter, start at 1 (big endian!) */
+		cdesc->control_data.token[3] = cpu_to_be32(1);
+
+		return;
+	} else if (ctx->xcm == EIP197_XCM_MODE_GCM) {
+		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
+
+		/* 96 bit IV part */
+		memcpy(&cdesc->control_data.token[0], iv, 12);
+		/* 32 bit counter, start at 1 (big endian!) */
+		cdesc->control_data.token[3] = cpu_to_be32(1);
+
+		return;
+	} else if (ctx->xcm == EIP197_XCM_MODE_CCM) {
+		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
+
+		/* Variable length IV part */
+		memcpy(&cdesc->control_data.token[0], iv, 15 - iv[0]);
+		/* Start variable length counter at 0 */
+		memset((u8 *)&cdesc->control_data.token[0] + 15 - iv[0],
+		       0, iv[0] + 1);
+
+		return;
+	}
+
+	if (ctx->mode != CONTEXT_CONTROL_CRYPTO_MODE_ECB) {
 		switch (ctx->alg) {
 		case SAFEXCEL_DES:
 			block_sz = DES_BLOCK_SIZE;
@@ -78,39 +117,28 @@
 			cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
 			break;
 		}
-
-		offset = block_sz / sizeof(u32);
 		memcpy(cdesc->control_data.token, iv, block_sz);
 	}
+}
 
-	token = (struct safexcel_token *)(cdesc->control_data.token + offset);
+static void safexcel_skcipher_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
+				    struct safexcel_command_desc *cdesc,
+				    u32 length)
+{
+	struct safexcel_token *token;
+
+	safexcel_cipher_token(ctx, iv, cdesc);
+
+	/* skip over worst case IV of 4 dwords, no need to be exact */
+	token = (struct safexcel_token *)(cdesc->control_data.token + 4);
 
 	token[0].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
 	token[0].packet_length = length;
 	token[0].stat = EIP197_TOKEN_STAT_LAST_PACKET |
 			EIP197_TOKEN_STAT_LAST_HASH;
 	token[0].instructions = EIP197_TOKEN_INS_LAST |
-				EIP197_TOKEN_INS_TYPE_CRYTO |
+				EIP197_TOKEN_INS_TYPE_CRYPTO |
 				EIP197_TOKEN_INS_TYPE_OUTPUT;
-
-	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
-		u32 last = (EIP197_MAX_TOKENS - 1) - offset;
-
-		token[last].opcode = EIP197_TOKEN_OPCODE_CTX_ACCESS;
-		token[last].packet_length = EIP197_TOKEN_DIRECTION_EXTERNAL |
-					    EIP197_TOKEN_EXEC_IF_SUCCESSFUL|
-					    EIP197_TOKEN_CTX_OFFSET(0x2);
-		token[last].stat = EIP197_TOKEN_STAT_LAST_HASH |
-			EIP197_TOKEN_STAT_LAST_PACKET;
-		token[last].instructions =
-			EIP197_TOKEN_INS_ORIGIN_LEN(block_sz / sizeof(u32)) |
-			EIP197_TOKEN_INS_ORIGIN_IV0;
-
-		/* Store the updated IV values back in the internal context
-		 * registers.
-		 */
-		cdesc->control_data.control1 |= CONTEXT_CONTROL_CRYPTO_STORE;
-	}
 }
 
 static void safexcel_aead_token(struct safexcel_cipher_ctx *ctx, u8 *iv,
@@ -119,53 +147,123 @@
 				u32 cryptlen, u32 assoclen, u32 digestsize)
 {
 	struct safexcel_token *token;
-	unsigned offset = 0;
 
-	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
-		offset = AES_BLOCK_SIZE / sizeof(u32);
-		memcpy(cdesc->control_data.token, iv, AES_BLOCK_SIZE);
-
-		cdesc->control_data.options |= EIP197_OPTION_4_TOKEN_IV_CMD;
-	}
-
-	token = (struct safexcel_token *)(cdesc->control_data.token + offset);
-
-	if (direction == SAFEXCEL_DECRYPT)
-		cryptlen -= digestsize;
-
-	token[0].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
-	token[0].packet_length = assoclen;
-	token[0].instructions = EIP197_TOKEN_INS_TYPE_HASH |
-				EIP197_TOKEN_INS_TYPE_OUTPUT;
-
-	token[1].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
-	token[1].packet_length = cryptlen;
-	token[1].stat = EIP197_TOKEN_STAT_LAST_HASH;
-	token[1].instructions = EIP197_TOKEN_INS_LAST |
-				EIP197_TOKEN_INS_TYPE_CRYTO |
-				EIP197_TOKEN_INS_TYPE_HASH |
-				EIP197_TOKEN_INS_TYPE_OUTPUT;
+	safexcel_cipher_token(ctx, iv, cdesc);
 
 	if (direction == SAFEXCEL_ENCRYPT) {
-		token[2].opcode = EIP197_TOKEN_OPCODE_INSERT;
-		token[2].packet_length = digestsize;
-		token[2].stat = EIP197_TOKEN_STAT_LAST_HASH |
-				EIP197_TOKEN_STAT_LAST_PACKET;
-		token[2].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
-					EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
-	} else {
-		token[2].opcode = EIP197_TOKEN_OPCODE_RETRIEVE;
-		token[2].packet_length = digestsize;
-		token[2].stat = EIP197_TOKEN_STAT_LAST_HASH |
-				EIP197_TOKEN_STAT_LAST_PACKET;
-		token[2].instructions = EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
+		/* align end of instruction sequence to end of token */
+		token = (struct safexcel_token *)(cdesc->control_data.token +
+			 EIP197_MAX_TOKENS - 13);
 
-		token[3].opcode = EIP197_TOKEN_OPCODE_VERIFY;
-		token[3].packet_length = digestsize |
-					 EIP197_TOKEN_HASH_RESULT_VERIFY;
-		token[3].stat = EIP197_TOKEN_STAT_LAST_HASH |
-				EIP197_TOKEN_STAT_LAST_PACKET;
-		token[3].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT;
+		token[12].opcode = EIP197_TOKEN_OPCODE_INSERT;
+		token[12].packet_length = digestsize;
+		token[12].stat = EIP197_TOKEN_STAT_LAST_HASH |
+				 EIP197_TOKEN_STAT_LAST_PACKET;
+		token[12].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
+					 EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
+	} else {
+		cryptlen -= digestsize;
+
+		/* align end of instruction sequence to end of token */
+		token = (struct safexcel_token *)(cdesc->control_data.token +
+			 EIP197_MAX_TOKENS - 14);
+
+		token[12].opcode = EIP197_TOKEN_OPCODE_RETRIEVE;
+		token[12].packet_length = digestsize;
+		token[12].stat = EIP197_TOKEN_STAT_LAST_HASH |
+				 EIP197_TOKEN_STAT_LAST_PACKET;
+		token[12].instructions = EIP197_TOKEN_INS_INSERT_HASH_DIGEST;
+
+		token[13].opcode = EIP197_TOKEN_OPCODE_VERIFY;
+		token[13].packet_length = digestsize |
+					  EIP197_TOKEN_HASH_RESULT_VERIFY;
+		token[13].stat = EIP197_TOKEN_STAT_LAST_HASH |
+				 EIP197_TOKEN_STAT_LAST_PACKET;
+		token[13].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT;
+	}
+
+	token[6].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+	token[6].packet_length = assoclen;
+
+	if (likely(cryptlen)) {
+		token[6].instructions = EIP197_TOKEN_INS_TYPE_HASH;
+
+		token[10].opcode = EIP197_TOKEN_OPCODE_DIRECTION;
+		token[10].packet_length = cryptlen;
+		token[10].stat = EIP197_TOKEN_STAT_LAST_HASH;
+		token[10].instructions = EIP197_TOKEN_INS_LAST |
+					 EIP197_TOKEN_INS_TYPE_CRYPTO |
+					 EIP197_TOKEN_INS_TYPE_HASH |
+					 EIP197_TOKEN_INS_TYPE_OUTPUT;
+	} else if (ctx->xcm != EIP197_XCM_MODE_CCM) {
+		token[6].stat = EIP197_TOKEN_STAT_LAST_HASH;
+		token[6].instructions = EIP197_TOKEN_INS_LAST |
+					EIP197_TOKEN_INS_TYPE_HASH;
+	}
+
+	if (!ctx->xcm)
+		return;
+
+	token[8].opcode = EIP197_TOKEN_OPCODE_INSERT_REMRES;
+	token[8].packet_length = 0;
+	token[8].instructions = AES_BLOCK_SIZE;
+
+	token[9].opcode = EIP197_TOKEN_OPCODE_INSERT;
+	token[9].packet_length = AES_BLOCK_SIZE;
+	token[9].instructions = EIP197_TOKEN_INS_TYPE_OUTPUT |
+				EIP197_TOKEN_INS_TYPE_CRYPTO;
+
+	if (ctx->xcm == EIP197_XCM_MODE_GCM) {
+		token[6].instructions = EIP197_TOKEN_INS_LAST |
+					EIP197_TOKEN_INS_TYPE_HASH;
+	} else {
+		u8 *cbcmaciv = (u8 *)&token[1];
+		u32 *aadlen = (u32 *)&token[5];
+
+		/* Construct IV block B0 for the CBC-MAC */
+		token[0].opcode = EIP197_TOKEN_OPCODE_INSERT;
+		token[0].packet_length = AES_BLOCK_SIZE +
+					 ((assoclen > 0) << 1);
+		token[0].instructions = EIP197_TOKEN_INS_ORIGIN_TOKEN |
+					EIP197_TOKEN_INS_TYPE_HASH;
+		/* Variable length IV part */
+		memcpy(cbcmaciv, iv, 15 - iv[0]);
+		/* fixup flags byte */
+		cbcmaciv[0] |= ((assoclen > 0) << 6) | ((digestsize - 2) << 2);
+		/* Clear upper bytes of variable message length to 0 */
+		memset(cbcmaciv + 15 - iv[0], 0, iv[0] - 1);
+		/* insert lower 2 bytes of message length */
+		cbcmaciv[14] = cryptlen >> 8;
+		cbcmaciv[15] = cryptlen & 255;
+
+		if (assoclen) {
+			*aadlen = cpu_to_le32(cpu_to_be16(assoclen));
+			assoclen += 2;
+		}
+
+		token[6].instructions = EIP197_TOKEN_INS_TYPE_HASH;
+
+		/* Align AAD data towards hash engine */
+		token[7].opcode = EIP197_TOKEN_OPCODE_INSERT;
+		assoclen &= 15;
+		token[7].packet_length = assoclen ? 16 - assoclen : 0;
+
+		if (likely(cryptlen)) {
+			token[7].instructions = EIP197_TOKEN_INS_TYPE_HASH;
+
+			/* Align crypto data towards hash engine */
+			token[10].stat = 0;
+
+			token[11].opcode = EIP197_TOKEN_OPCODE_INSERT;
+			cryptlen &= 15;
+			token[11].packet_length = cryptlen ? 16 - cryptlen : 0;
+			token[11].stat = EIP197_TOKEN_STAT_LAST_HASH;
+			token[11].instructions = EIP197_TOKEN_INS_TYPE_HASH;
+		} else {
+			token[7].stat = EIP197_TOKEN_STAT_LAST_HASH;
+			token[7].instructions = EIP197_TOKEN_INS_LAST |
+						EIP197_TOKEN_INS_TYPE_HASH;
+		}
 	}
 }
 
@@ -178,7 +276,7 @@
 	struct crypto_aes_ctx aes;
 	int ret, i;
 
-	ret = crypto_aes_expand_key(&aes, key, len);
+	ret = aes_expandkey(&aes, key, len);
 	if (ret) {
 		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return ret;
@@ -202,22 +300,49 @@
 	return 0;
 }
 
-static int safexcel_aead_aes_setkey(struct crypto_aead *ctfm, const u8 *key,
-				    unsigned int len)
+static int safexcel_aead_setkey(struct crypto_aead *ctfm, const u8 *key,
+				unsigned int len)
 {
 	struct crypto_tfm *tfm = crypto_aead_tfm(ctfm);
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
 	struct safexcel_ahash_export_state istate, ostate;
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct crypto_authenc_keys keys;
+	struct crypto_aes_ctx aes;
+	int err = -EINVAL;
 
 	if (crypto_authenc_extractkeys(&keys, key, len) != 0)
 		goto badkey;
 
-	if (keys.enckeylen > sizeof(ctx->key))
-		goto badkey;
+	if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD) {
+		/* Minimum keysize is minimum AES key size + nonce size */
+		if (keys.enckeylen < (AES_MIN_KEY_SIZE +
+				      CTR_RFC3686_NONCE_SIZE))
+			goto badkey;
+		/* last 4 bytes of key are the nonce! */
+		ctx->nonce = *(u32 *)(keys.enckey + keys.enckeylen -
+				      CTR_RFC3686_NONCE_SIZE);
+		/* exclude the nonce here */
+		keys.enckeylen -= CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD;
+	}
 
 	/* Encryption key */
+	switch (ctx->alg) {
+	case SAFEXCEL_3DES:
+		err = verify_aead_des3_key(ctfm, keys.enckey, keys.enckeylen);
+		if (unlikely(err))
+			goto badkey_expflags;
+		break;
+	case SAFEXCEL_AES:
+		err = aes_expandkey(&aes, keys.enckey, keys.enckeylen);
+		if (unlikely(err))
+			goto badkey;
+		break;
+	default:
+		dev_err(priv->dev, "aead: unsupported cipher algorithm\n");
+		goto badkey;
+	}
+
 	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma &&
 	    memcmp(ctx->key, keys.enckey, keys.enckeylen))
 		ctx->base.needs_inv = true;
@@ -274,8 +399,9 @@
 
 badkey:
 	crypto_aead_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+badkey_expflags:
 	memzero_explicit(&keys, sizeof(keys));
-	return -EINVAL;
+	return err;
 }
 
 static int safexcel_context_control(struct safexcel_cipher_ctx *ctx,
@@ -284,59 +410,78 @@
 				    struct safexcel_command_desc *cdesc)
 {
 	struct safexcel_crypto_priv *priv = ctx->priv;
-	int ctrl_size;
+	int ctrl_size = ctx->key_len / sizeof(u32);
+
+	cdesc->control_data.control1 = ctx->mode;
 
 	if (ctx->aead) {
+		/* Take in account the ipad+opad digests */
+		if (ctx->xcm) {
+			ctrl_size += ctx->state_sz / sizeof(u32);
+			cdesc->control_data.control0 =
+				CONTEXT_CONTROL_KEY_EN |
+				CONTEXT_CONTROL_DIGEST_XCM |
+				ctx->hash_alg |
+				CONTEXT_CONTROL_SIZE(ctrl_size);
+		} else {
+			ctrl_size += ctx->state_sz / sizeof(u32) * 2;
+			cdesc->control_data.control0 =
+				CONTEXT_CONTROL_KEY_EN |
+				CONTEXT_CONTROL_DIGEST_HMAC |
+				ctx->hash_alg |
+				CONTEXT_CONTROL_SIZE(ctrl_size);
+		}
 		if (sreq->direction == SAFEXCEL_ENCRYPT)
-			cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_ENCRYPT_HASH_OUT;
-		else
-			cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_HASH_DECRYPT_IN;
-	} else {
-		cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_CRYPTO_OUT;
+			cdesc->control_data.control0 |=
+				(ctx->xcm == EIP197_XCM_MODE_CCM) ?
+					CONTEXT_CONTROL_TYPE_HASH_ENCRYPT_OUT :
+					CONTEXT_CONTROL_TYPE_ENCRYPT_HASH_OUT;
 
-		/* The decryption control type is a combination of the
-		 * encryption type and CONTEXT_CONTROL_TYPE_NULL_IN, for all
-		 * types.
-		 */
-		if (sreq->direction == SAFEXCEL_DECRYPT)
-			cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_NULL_IN;
+		else
+			cdesc->control_data.control0 |=
+				(ctx->xcm == EIP197_XCM_MODE_CCM) ?
+					CONTEXT_CONTROL_TYPE_DECRYPT_HASH_IN :
+					CONTEXT_CONTROL_TYPE_HASH_DECRYPT_IN;
+	} else {
+		if (sreq->direction == SAFEXCEL_ENCRYPT)
+			cdesc->control_data.control0 =
+				CONTEXT_CONTROL_TYPE_CRYPTO_OUT |
+				CONTEXT_CONTROL_KEY_EN |
+				CONTEXT_CONTROL_SIZE(ctrl_size);
+		else
+			cdesc->control_data.control0 =
+				CONTEXT_CONTROL_TYPE_CRYPTO_IN |
+				CONTEXT_CONTROL_KEY_EN |
+				CONTEXT_CONTROL_SIZE(ctrl_size);
 	}
 
-	cdesc->control_data.control0 |= CONTEXT_CONTROL_KEY_EN;
-	cdesc->control_data.control1 |= ctx->mode;
-
-	if (ctx->aead)
-		cdesc->control_data.control0 |= CONTEXT_CONTROL_DIGEST_HMAC |
-						ctx->hash_alg;
-
 	if (ctx->alg == SAFEXCEL_DES) {
-		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_DES;
+		cdesc->control_data.control0 |=
+			CONTEXT_CONTROL_CRYPTO_ALG_DES;
 	} else if (ctx->alg == SAFEXCEL_3DES) {
-		cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_3DES;
+		cdesc->control_data.control0 |=
+			CONTEXT_CONTROL_CRYPTO_ALG_3DES;
 	} else if (ctx->alg == SAFEXCEL_AES) {
-		switch (ctx->key_len) {
+		switch (ctx->key_len >> ctx->xts) {
 		case AES_KEYSIZE_128:
-			cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES128;
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_CRYPTO_ALG_AES128;
 			break;
 		case AES_KEYSIZE_192:
-			cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES192;
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_CRYPTO_ALG_AES192;
 			break;
 		case AES_KEYSIZE_256:
-			cdesc->control_data.control0 |= CONTEXT_CONTROL_CRYPTO_ALG_AES256;
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_CRYPTO_ALG_AES256;
 			break;
 		default:
 			dev_err(priv->dev, "aes keysize not supported: %u\n",
-				ctx->key_len);
+				ctx->key_len >> ctx->xts);
 			return -EINVAL;
 		}
 	}
 
-	ctrl_size = ctx->key_len / sizeof(u32);
-	if (ctx->aead)
-		/* Take in account the ipad+opad digests */
-		ctrl_size += ctx->state_sz / sizeof(u32) * 2;
-	cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(ctrl_size);
-
 	return 0;
 }
 
@@ -348,6 +493,9 @@
 				      struct safexcel_cipher_req *sreq,
 				      bool *should_complete, int *ret)
 {
+	struct skcipher_request *areq = skcipher_request_cast(async);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(areq);
+	struct safexcel_cipher_ctx *ctx = crypto_skcipher_ctx(skcipher);
 	struct safexcel_result_desc *rdesc;
 	int ndesc = 0;
 
@@ -374,10 +522,22 @@
 	safexcel_complete(priv, ring);
 
 	if (src == dst) {
-		dma_unmap_sg(priv->dev, src, sg_nents(src), DMA_BIDIRECTIONAL);
+		dma_unmap_sg(priv->dev, src, sreq->nr_src, DMA_BIDIRECTIONAL);
 	} else {
-		dma_unmap_sg(priv->dev, src, sg_nents(src), DMA_TO_DEVICE);
-		dma_unmap_sg(priv->dev, dst, sg_nents(dst), DMA_FROM_DEVICE);
+		dma_unmap_sg(priv->dev, src, sreq->nr_src, DMA_TO_DEVICE);
+		dma_unmap_sg(priv->dev, dst, sreq->nr_dst, DMA_FROM_DEVICE);
+	}
+
+	/*
+	 * Update IV in req from last crypto output word for CBC modes
+	 */
+	if ((!ctx->aead) && (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) &&
+	    (sreq->direction == SAFEXCEL_ENCRYPT)) {
+		/* For encrypt take the last output word */
+		sg_pcopy_to_buffer(dst, sreq->nr_dst, areq->iv,
+				   crypto_skcipher_ivsize(skcipher),
+				   (cryptlen -
+				    crypto_skcipher_ivsize(skcipher)));
 	}
 
 	*should_complete = true;
@@ -392,53 +552,105 @@
 			     unsigned int digestsize, u8 *iv, int *commands,
 			     int *results)
 {
+	struct skcipher_request *areq = skcipher_request_cast(base);
+	struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(areq);
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct safexcel_command_desc *cdesc;
+	struct safexcel_command_desc *first_cdesc = NULL;
 	struct safexcel_result_desc *rdesc, *first_rdesc = NULL;
 	struct scatterlist *sg;
-	unsigned int totlen = cryptlen + assoclen;
-	int nr_src, nr_dst, n_cdesc = 0, n_rdesc = 0, queued = totlen;
-	int i, ret = 0;
+	unsigned int totlen;
+	unsigned int totlen_src = cryptlen + assoclen;
+	unsigned int totlen_dst = totlen_src;
+	int n_cdesc = 0, n_rdesc = 0;
+	int queued, i, ret = 0;
+	bool first = true;
+
+	sreq->nr_src = sg_nents_for_len(src, totlen_src);
+
+	if (ctx->aead) {
+		/*
+		 * AEAD has auth tag appended to output for encrypt and
+		 * removed from the output for decrypt!
+		 */
+		if (sreq->direction == SAFEXCEL_DECRYPT)
+			totlen_dst -= digestsize;
+		else
+			totlen_dst += digestsize;
+
+		memcpy(ctx->base.ctxr->data + ctx->key_len / sizeof(u32),
+		       ctx->ipad, ctx->state_sz);
+		if (!ctx->xcm)
+			memcpy(ctx->base.ctxr->data + (ctx->key_len +
+			       ctx->state_sz) / sizeof(u32), ctx->opad,
+			       ctx->state_sz);
+	} else if ((ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) &&
+		   (sreq->direction == SAFEXCEL_DECRYPT)) {
+		/*
+		 * Save IV from last crypto input word for CBC modes in decrypt
+		 * direction. Need to do this first in case of inplace operation
+		 * as it will be overwritten.
+		 */
+		sg_pcopy_to_buffer(src, sreq->nr_src, areq->iv,
+				   crypto_skcipher_ivsize(skcipher),
+				   (totlen_src -
+				    crypto_skcipher_ivsize(skcipher)));
+	}
+
+	sreq->nr_dst = sg_nents_for_len(dst, totlen_dst);
+
+	/*
+	 * Remember actual input length, source buffer length may be
+	 * updated in case of inline operation below.
+	 */
+	totlen = totlen_src;
+	queued = totlen_src;
 
 	if (src == dst) {
-		nr_src = dma_map_sg(priv->dev, src, sg_nents(src),
-				    DMA_BIDIRECTIONAL);
-		nr_dst = nr_src;
-		if (!nr_src)
-			return -EINVAL;
-	} else {
-		nr_src = dma_map_sg(priv->dev, src, sg_nents(src),
-				    DMA_TO_DEVICE);
-		if (!nr_src)
-			return -EINVAL;
-
-		nr_dst = dma_map_sg(priv->dev, dst, sg_nents(dst),
-				    DMA_FROM_DEVICE);
-		if (!nr_dst) {
-			dma_unmap_sg(priv->dev, src, nr_src, DMA_TO_DEVICE);
+		sreq->nr_src = max(sreq->nr_src, sreq->nr_dst);
+		sreq->nr_dst = sreq->nr_src;
+		if (unlikely((totlen_src || totlen_dst) &&
+		    (sreq->nr_src <= 0))) {
+			dev_err(priv->dev, "In-place buffer not large enough (need %d bytes)!",
+				max(totlen_src, totlen_dst));
 			return -EINVAL;
 		}
+		dma_map_sg(priv->dev, src, sreq->nr_src, DMA_BIDIRECTIONAL);
+	} else {
+		if (unlikely(totlen_src && (sreq->nr_src <= 0))) {
+			dev_err(priv->dev, "Source buffer not large enough (need %d bytes)!",
+				totlen_src);
+			return -EINVAL;
+		}
+		dma_map_sg(priv->dev, src, sreq->nr_src, DMA_TO_DEVICE);
+
+		if (unlikely(totlen_dst && (sreq->nr_dst <= 0))) {
+			dev_err(priv->dev, "Dest buffer not large enough (need %d bytes)!",
+				totlen_dst);
+			dma_unmap_sg(priv->dev, src, sreq->nr_src,
+				     DMA_TO_DEVICE);
+			return -EINVAL;
+		}
+		dma_map_sg(priv->dev, dst, sreq->nr_dst, DMA_FROM_DEVICE);
 	}
 
 	memcpy(ctx->base.ctxr->data, ctx->key, ctx->key_len);
 
-	if (ctx->aead) {
-		memcpy(ctx->base.ctxr->data + ctx->key_len / sizeof(u32),
-		       ctx->ipad, ctx->state_sz);
-		memcpy(ctx->base.ctxr->data + (ctx->key_len + ctx->state_sz) / sizeof(u32),
-		       ctx->opad, ctx->state_sz);
-	}
+	/* The EIP cannot deal with zero length input packets! */
+	if (totlen == 0)
+		totlen = 1;
 
 	/* command descriptors */
-	for_each_sg(src, sg, nr_src, i) {
+	for_each_sg(src, sg, sreq->nr_src, i) {
 		int len = sg_dma_len(sg);
 
 		/* Do not overflow the request */
 		if (queued - len < 0)
 			len = queued;
 
-		cdesc = safexcel_add_cdesc(priv, ring, !n_cdesc, !(queued - len),
+		cdesc = safexcel_add_cdesc(priv, ring, !n_cdesc,
+					   !(queued - len),
 					   sg_dma_address(sg), len, totlen,
 					   ctx->base.ctxr_dma);
 		if (IS_ERR(cdesc)) {
@@ -449,14 +661,7 @@
 		n_cdesc++;
 
 		if (n_cdesc == 1) {
-			safexcel_context_control(ctx, base, sreq, cdesc);
-			if (ctx->aead)
-				safexcel_aead_token(ctx, iv, cdesc,
-						    sreq->direction, cryptlen,
-						    assoclen, digestsize);
-			else
-				safexcel_skcipher_token(ctx, iv, cdesc,
-							cryptlen);
+			first_cdesc = cdesc;
 		}
 
 		queued -= len;
@@ -464,23 +669,83 @@
 			break;
 	}
 
+	if (unlikely(!n_cdesc)) {
+		/*
+		 * Special case: zero length input buffer.
+		 * The engine always needs the 1st command descriptor, however!
+		 */
+		first_cdesc = safexcel_add_cdesc(priv, ring, 1, 1, 0, 0, totlen,
+						 ctx->base.ctxr_dma);
+		n_cdesc = 1;
+	}
+
+	/* Add context control words and token to first command descriptor */
+	safexcel_context_control(ctx, base, sreq, first_cdesc);
+	if (ctx->aead)
+		safexcel_aead_token(ctx, iv, first_cdesc,
+				    sreq->direction, cryptlen,
+				    assoclen, digestsize);
+	else
+		safexcel_skcipher_token(ctx, iv, first_cdesc,
+					cryptlen);
+
 	/* result descriptors */
-	for_each_sg(dst, sg, nr_dst, i) {
-		bool first = !i, last = sg_is_last(sg);
+	for_each_sg(dst, sg, sreq->nr_dst, i) {
+		bool last = (i == sreq->nr_dst - 1);
 		u32 len = sg_dma_len(sg);
 
-		rdesc = safexcel_add_rdesc(priv, ring, first, last,
-					   sg_dma_address(sg), len);
+		/* only allow the part of the buffer we know we need */
+		if (len > totlen_dst)
+			len = totlen_dst;
+		if (unlikely(!len))
+			break;
+		totlen_dst -= len;
+
+		/* skip over AAD space in buffer - not written */
+		if (assoclen) {
+			if (assoclen >= len) {
+				assoclen -= len;
+				continue;
+			}
+			rdesc = safexcel_add_rdesc(priv, ring, first, last,
+						   sg_dma_address(sg) +
+						   assoclen,
+						   len - assoclen);
+			assoclen = 0;
+		} else {
+			rdesc = safexcel_add_rdesc(priv, ring, first, last,
+						   sg_dma_address(sg),
+						   len);
+		}
 		if (IS_ERR(rdesc)) {
 			/* No space left in the result descriptor ring */
 			ret = PTR_ERR(rdesc);
 			goto rdesc_rollback;
 		}
-		if (first)
+		if (first) {
 			first_rdesc = rdesc;
+			first = false;
+		}
 		n_rdesc++;
 	}
 
+	if (unlikely(first)) {
+		/*
+		 * Special case: AEAD decrypt with only AAD data.
+		 * In this case there is NO output data from the engine,
+		 * but the engine still needs a result descriptor!
+		 * Create a dummy one just for catching the result token.
+		 */
+		rdesc = safexcel_add_rdesc(priv, ring, true, true, 0, 0);
+		if (IS_ERR(rdesc)) {
+			/* No space left in the result descriptor ring */
+			ret = PTR_ERR(rdesc);
+			goto rdesc_rollback;
+		}
+		first_rdesc = rdesc;
+		n_rdesc = 1;
+	}
+
 	safexcel_rdr_req_set(priv, ring, first_rdesc, base);
 
 	*commands = n_cdesc;
@@ -495,10 +760,10 @@
 		safexcel_ring_rollback_wptr(priv, &priv->ring[ring].cdr);
 
 	if (src == dst) {
-		dma_unmap_sg(priv->dev, src, nr_src, DMA_BIDIRECTIONAL);
+		dma_unmap_sg(priv->dev, src, sreq->nr_src, DMA_BIDIRECTIONAL);
 	} else {
-		dma_unmap_sg(priv->dev, src, nr_src, DMA_TO_DEVICE);
-		dma_unmap_sg(priv->dev, dst, nr_dst, DMA_FROM_DEVICE);
+		dma_unmap_sg(priv->dev, src, sreq->nr_src, DMA_TO_DEVICE);
+		dma_unmap_sg(priv->dev, dst, sreq->nr_dst, DMA_FROM_DEVICE);
 	}
 
 	return ret;
@@ -570,7 +835,6 @@
 {
 	struct skcipher_request *req = skcipher_request_cast(async);
 	struct safexcel_cipher_req *sreq = skcipher_request_ctx(req);
-	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(async->tfm);
 	int err;
 
 	if (sreq->needs_inv) {
@@ -581,24 +845,6 @@
 		err = safexcel_handle_req_result(priv, ring, async, req->src,
 						 req->dst, req->cryptlen, sreq,
 						 should_complete, ret);
-
-		if (ctx->mode == CONTEXT_CONTROL_CRYPTO_MODE_CBC) {
-			u32 block_sz = 0;
-
-			switch (ctx->alg) {
-			case SAFEXCEL_DES:
-				block_sz = DES_BLOCK_SIZE;
-				break;
-			case SAFEXCEL_3DES:
-				block_sz = DES3_EDE_BLOCK_SIZE;
-				break;
-			case SAFEXCEL_AES:
-				block_sz = AES_BLOCK_SIZE;
-				break;
-			}
-
-			memcpy(req->iv, ctx->base.ctxr->data, block_sz);
-		}
 	}
 
 	return err;
@@ -656,12 +902,22 @@
 
 	BUG_ON(!(priv->flags & EIP197_TRC_CACHE) && sreq->needs_inv);
 
-	if (sreq->needs_inv)
+	if (sreq->needs_inv) {
 		ret = safexcel_cipher_send_inv(async, ring, commands, results);
-	else
+	} else {
+		struct crypto_skcipher *skcipher = crypto_skcipher_reqtfm(req);
+		u8 input_iv[AES_BLOCK_SIZE];
+
+		/*
+		 * Save input IV in case of CBC decrypt mode
+		 * Will be overwritten with output IV prior to use!
+		 */
+		memcpy(input_iv, req->iv, crypto_skcipher_ivsize(skcipher));
+
 		ret = safexcel_send_req(async, ring, sreq, req->src,
-					req->dst, req->cryptlen, 0, 0, req->iv,
+					req->dst, req->cryptlen, 0, 0, input_iv,
 					commands, results);
+	}
 
 	sreq->rdescs = *results;
 	return ret;
@@ -756,8 +1012,7 @@
 
 static int safexcel_queue_req(struct crypto_async_request *base,
 			struct safexcel_cipher_req *sreq,
-			enum safexcel_cipher_direction dir, u32 mode,
-			enum safexcel_cipher_alg alg)
+			enum safexcel_cipher_direction dir)
 {
 	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(base->tfm);
 	struct safexcel_crypto_priv *priv = ctx->priv;
@@ -765,8 +1020,6 @@
 
 	sreq->needs_inv = false;
 	sreq->direction = dir;
-	ctx->alg = alg;
-	ctx->mode = mode;
 
 	if (ctx->base.ctxr) {
 		if (priv->flags & EIP197_TRC_CACHE && ctx->base.needs_inv) {
@@ -794,18 +1047,16 @@
 	return ret;
 }
 
-static int safexcel_ecb_aes_encrypt(struct skcipher_request *req)
+static int safexcel_encrypt(struct skcipher_request *req)
 {
 	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_ENCRYPT, CONTEXT_CONTROL_CRYPTO_MODE_ECB,
-			SAFEXCEL_AES);
+			SAFEXCEL_ENCRYPT);
 }
 
-static int safexcel_ecb_aes_decrypt(struct skcipher_request *req)
+static int safexcel_decrypt(struct skcipher_request *req)
 {
 	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_DECRYPT, CONTEXT_CONTROL_CRYPTO_MODE_ECB,
-			SAFEXCEL_AES);
+			SAFEXCEL_DECRYPT);
 }
 
 static int safexcel_skcipher_cra_init(struct crypto_tfm *tfm)
@@ -879,103 +1130,233 @@
 	}
 }
 
+static int safexcel_skcipher_aes_ecb_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_AES;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_ECB;
+	return 0;
+}
+
 struct safexcel_alg_template safexcel_alg_ecb_aes = {
 	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_AES,
 	.alg.skcipher = {
 		.setkey = safexcel_skcipher_aes_setkey,
-		.encrypt = safexcel_ecb_aes_encrypt,
-		.decrypt = safexcel_ecb_aes_decrypt,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
 		.min_keysize = AES_MIN_KEY_SIZE,
 		.max_keysize = AES_MAX_KEY_SIZE,
 		.base = {
 			.cra_name = "ecb(aes)",
 			.cra_driver_name = "safexcel-ecb-aes",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = AES_BLOCK_SIZE,
 			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
 			.cra_alignmask = 0,
-			.cra_init = safexcel_skcipher_cra_init,
+			.cra_init = safexcel_skcipher_aes_ecb_cra_init,
 			.cra_exit = safexcel_skcipher_cra_exit,
 			.cra_module = THIS_MODULE,
 		},
 	},
 };
 
-static int safexcel_cbc_aes_encrypt(struct skcipher_request *req)
+static int safexcel_skcipher_aes_cbc_cra_init(struct crypto_tfm *tfm)
 {
-	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_ENCRYPT, CONTEXT_CONTROL_CRYPTO_MODE_CBC,
-			SAFEXCEL_AES);
-}
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
 
-static int safexcel_cbc_aes_decrypt(struct skcipher_request *req)
-{
-	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_DECRYPT, CONTEXT_CONTROL_CRYPTO_MODE_CBC,
-			SAFEXCEL_AES);
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_AES;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CBC;
+	return 0;
 }
 
 struct safexcel_alg_template safexcel_alg_cbc_aes = {
 	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_AES,
 	.alg.skcipher = {
 		.setkey = safexcel_skcipher_aes_setkey,
-		.encrypt = safexcel_cbc_aes_encrypt,
-		.decrypt = safexcel_cbc_aes_decrypt,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
 		.min_keysize = AES_MIN_KEY_SIZE,
 		.max_keysize = AES_MAX_KEY_SIZE,
 		.ivsize = AES_BLOCK_SIZE,
 		.base = {
 			.cra_name = "cbc(aes)",
 			.cra_driver_name = "safexcel-cbc-aes",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = AES_BLOCK_SIZE,
 			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
 			.cra_alignmask = 0,
-			.cra_init = safexcel_skcipher_cra_init,
+			.cra_init = safexcel_skcipher_aes_cbc_cra_init,
 			.cra_exit = safexcel_skcipher_cra_exit,
 			.cra_module = THIS_MODULE,
 		},
 	},
 };
 
-static int safexcel_cbc_des_encrypt(struct skcipher_request *req)
+static int safexcel_skcipher_aes_cfb_cra_init(struct crypto_tfm *tfm)
 {
-	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_ENCRYPT, CONTEXT_CONTROL_CRYPTO_MODE_CBC,
-			SAFEXCEL_DES);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_AES;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CFB;
+	return 0;
 }
 
-static int safexcel_cbc_des_decrypt(struct skcipher_request *req)
+struct safexcel_alg_template safexcel_alg_cfb_aes = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_AES_XFB,
+	.alg.skcipher = {
+		.setkey = safexcel_skcipher_aes_setkey,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.base = {
+			.cra_name = "cfb(aes)",
+			.cra_driver_name = "safexcel-cfb-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_aes_cfb_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_skcipher_aes_ofb_cra_init(struct crypto_tfm *tfm)
 {
-	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_DECRYPT, CONTEXT_CONTROL_CRYPTO_MODE_CBC,
-			SAFEXCEL_DES);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_AES;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_OFB;
+	return 0;
 }
 
+struct safexcel_alg_template safexcel_alg_ofb_aes = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_AES_XFB,
+	.alg.skcipher = {
+		.setkey = safexcel_skcipher_aes_setkey,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
+		.min_keysize = AES_MIN_KEY_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE,
+		.ivsize = AES_BLOCK_SIZE,
+		.base = {
+			.cra_name = "ofb(aes)",
+			.cra_driver_name = "safexcel-ofb-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_aes_ofb_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_skcipher_aesctr_setkey(struct crypto_skcipher *ctfm,
+					   const u8 *key, unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct crypto_aes_ctx aes;
+	int ret, i;
+	unsigned int keylen;
+
+	/* last 4 bytes of key are the nonce! */
+	ctx->nonce = *(u32 *)(key + len - CTR_RFC3686_NONCE_SIZE);
+	/* exclude the nonce here */
+	keylen = len - CTR_RFC3686_NONCE_SIZE;
+	ret = aes_expandkey(&aes, key, keylen);
+	if (ret) {
+		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
+
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
+		for (i = 0; i < keylen / sizeof(u32); i++) {
+			if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+				ctx->base.needs_inv = true;
+				break;
+			}
+		}
+	}
+
+	for (i = 0; i < keylen / sizeof(u32); i++)
+		ctx->key[i] = cpu_to_le32(aes.key_enc[i]);
+
+	ctx->key_len = keylen;
+
+	memzero_explicit(&aes, sizeof(aes));
+	return 0;
+}
+
+static int safexcel_skcipher_aes_ctr_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_AES;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD;
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_ctr_aes = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.algo_mask = SAFEXCEL_ALG_AES,
+	.alg.skcipher = {
+		.setkey = safexcel_skcipher_aesctr_setkey,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
+		/* Add nonce size */
+		.min_keysize = AES_MIN_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+		.max_keysize = AES_MAX_KEY_SIZE + CTR_RFC3686_NONCE_SIZE,
+		.ivsize = CTR_RFC3686_IV_SIZE,
+		.base = {
+			.cra_name = "rfc3686(ctr(aes))",
+			.cra_driver_name = "safexcel-ctr-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_aes_ctr_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
 static int safexcel_des_setkey(struct crypto_skcipher *ctfm, const u8 *key,
 			       unsigned int len)
 {
-	struct crypto_tfm *tfm = crypto_skcipher_tfm(ctfm);
-	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 tmp[DES_EXPKEY_WORDS];
+	struct safexcel_cipher_ctx *ctx = crypto_skcipher_ctx(ctfm);
 	int ret;
 
-	if (len != DES_KEY_SIZE) {
-		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	ret = des_ekey(tmp, key);
-	if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
+	ret = verify_skcipher_des_key(ctfm, key);
+	if (ret)
+		return ret;
 
 	/* if context exits and key changed, need to invalidate it */
 	if (ctx->base.ctxr_dma)
@@ -988,94 +1369,85 @@
 	return 0;
 }
 
+static int safexcel_skcipher_des_cbc_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_DES;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CBC;
+	return 0;
+}
+
 struct safexcel_alg_template safexcel_alg_cbc_des = {
 	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_DES,
 	.alg.skcipher = {
 		.setkey = safexcel_des_setkey,
-		.encrypt = safexcel_cbc_des_encrypt,
-		.decrypt = safexcel_cbc_des_decrypt,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
 		.min_keysize = DES_KEY_SIZE,
 		.max_keysize = DES_KEY_SIZE,
 		.ivsize = DES_BLOCK_SIZE,
 		.base = {
 			.cra_name = "cbc(des)",
 			.cra_driver_name = "safexcel-cbc-des",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = DES_BLOCK_SIZE,
 			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
 			.cra_alignmask = 0,
-			.cra_init = safexcel_skcipher_cra_init,
+			.cra_init = safexcel_skcipher_des_cbc_cra_init,
 			.cra_exit = safexcel_skcipher_cra_exit,
 			.cra_module = THIS_MODULE,
 		},
 	},
 };
 
-static int safexcel_ecb_des_encrypt(struct skcipher_request *req)
+static int safexcel_skcipher_des_ecb_cra_init(struct crypto_tfm *tfm)
 {
-	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_ENCRYPT, CONTEXT_CONTROL_CRYPTO_MODE_ECB,
-			SAFEXCEL_DES);
-}
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
 
-static int safexcel_ecb_des_decrypt(struct skcipher_request *req)
-{
-	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_DECRYPT, CONTEXT_CONTROL_CRYPTO_MODE_ECB,
-			SAFEXCEL_DES);
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_DES;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_ECB;
+	return 0;
 }
 
 struct safexcel_alg_template safexcel_alg_ecb_des = {
 	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_DES,
 	.alg.skcipher = {
 		.setkey = safexcel_des_setkey,
-		.encrypt = safexcel_ecb_des_encrypt,
-		.decrypt = safexcel_ecb_des_decrypt,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
 		.min_keysize = DES_KEY_SIZE,
 		.max_keysize = DES_KEY_SIZE,
-		.ivsize = DES_BLOCK_SIZE,
 		.base = {
 			.cra_name = "ecb(des)",
 			.cra_driver_name = "safexcel-ecb-des",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = DES_BLOCK_SIZE,
 			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
 			.cra_alignmask = 0,
-			.cra_init = safexcel_skcipher_cra_init,
+			.cra_init = safexcel_skcipher_des_ecb_cra_init,
 			.cra_exit = safexcel_skcipher_cra_exit,
 			.cra_module = THIS_MODULE,
 		},
 	},
 };
 
-static int safexcel_cbc_des3_ede_encrypt(struct skcipher_request *req)
-{
-	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_ENCRYPT, CONTEXT_CONTROL_CRYPTO_MODE_CBC,
-			SAFEXCEL_3DES);
-}
-
-static int safexcel_cbc_des3_ede_decrypt(struct skcipher_request *req)
-{
-	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_DECRYPT, CONTEXT_CONTROL_CRYPTO_MODE_CBC,
-			SAFEXCEL_3DES);
-}
-
 static int safexcel_des3_ede_setkey(struct crypto_skcipher *ctfm,
 				   const u8 *key, unsigned int len)
 {
 	struct safexcel_cipher_ctx *ctx = crypto_skcipher_ctx(ctfm);
 	int err;
 
-	err = des3_verify_key(ctfm, key);
-	if (unlikely(err))
+	err = verify_skcipher_des3_key(ctfm, key);
+	if (err)
 		return err;
 
 	/* if context exits and key changed, need to invalidate it */
@@ -1091,66 +1463,71 @@
 	return 0;
 }
 
+static int safexcel_skcipher_des3_cbc_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_3DES;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CBC;
+	return 0;
+}
+
 struct safexcel_alg_template safexcel_alg_cbc_des3_ede = {
 	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_DES,
 	.alg.skcipher = {
 		.setkey = safexcel_des3_ede_setkey,
-		.encrypt = safexcel_cbc_des3_ede_encrypt,
-		.decrypt = safexcel_cbc_des3_ede_decrypt,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
 		.min_keysize = DES3_EDE_KEY_SIZE,
 		.max_keysize = DES3_EDE_KEY_SIZE,
 		.ivsize = DES3_EDE_BLOCK_SIZE,
 		.base = {
 			.cra_name = "cbc(des3_ede)",
 			.cra_driver_name = "safexcel-cbc-des3_ede",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
 			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
 			.cra_alignmask = 0,
-			.cra_init = safexcel_skcipher_cra_init,
+			.cra_init = safexcel_skcipher_des3_cbc_cra_init,
 			.cra_exit = safexcel_skcipher_cra_exit,
 			.cra_module = THIS_MODULE,
 		},
 	},
 };
 
-static int safexcel_ecb_des3_ede_encrypt(struct skcipher_request *req)
+static int safexcel_skcipher_des3_ecb_cra_init(struct crypto_tfm *tfm)
 {
-	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_ENCRYPT, CONTEXT_CONTROL_CRYPTO_MODE_ECB,
-			SAFEXCEL_3DES);
-}
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
 
-static int safexcel_ecb_des3_ede_decrypt(struct skcipher_request *req)
-{
-	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
-			SAFEXCEL_DECRYPT, CONTEXT_CONTROL_CRYPTO_MODE_ECB,
-			SAFEXCEL_3DES);
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_3DES;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_ECB;
+	return 0;
 }
 
 struct safexcel_alg_template safexcel_alg_ecb_des3_ede = {
 	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_DES,
 	.alg.skcipher = {
 		.setkey = safexcel_des3_ede_setkey,
-		.encrypt = safexcel_ecb_des3_ede_encrypt,
-		.decrypt = safexcel_ecb_des3_ede_decrypt,
+		.encrypt = safexcel_encrypt,
+		.decrypt = safexcel_decrypt,
 		.min_keysize = DES3_EDE_KEY_SIZE,
 		.max_keysize = DES3_EDE_KEY_SIZE,
-		.ivsize = DES3_EDE_BLOCK_SIZE,
 		.base = {
 			.cra_name = "ecb(des3_ede)",
 			.cra_driver_name = "safexcel-ecb-des3_ede",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
 			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
 			.cra_alignmask = 0,
-			.cra_init = safexcel_skcipher_cra_init,
+			.cra_init = safexcel_skcipher_des3_ecb_cra_init,
 			.cra_exit = safexcel_skcipher_cra_exit,
 			.cra_module = THIS_MODULE,
 		},
@@ -1161,16 +1538,14 @@
 {
 	struct safexcel_cipher_req *creq = aead_request_ctx(req);
 
-	return safexcel_queue_req(&req->base, creq, SAFEXCEL_ENCRYPT,
-			CONTEXT_CONTROL_CRYPTO_MODE_CBC, SAFEXCEL_AES);
+	return safexcel_queue_req(&req->base, creq, SAFEXCEL_ENCRYPT);
 }
 
 static int safexcel_aead_decrypt(struct aead_request *req)
 {
 	struct safexcel_cipher_req *creq = aead_request_ctx(req);
 
-	return safexcel_queue_req(&req->base, creq, SAFEXCEL_DECRYPT,
-			CONTEXT_CONTROL_CRYPTO_MODE_CBC, SAFEXCEL_AES);
+	return safexcel_queue_req(&req->base, creq, SAFEXCEL_DECRYPT);
 }
 
 static int safexcel_aead_cra_init(struct crypto_tfm *tfm)
@@ -1185,6 +1560,8 @@
 
 	ctx->priv = tmpl->priv;
 
+	ctx->alg  = SAFEXCEL_AES; /* default */
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CBC; /* default */
 	ctx->aead = true;
 	ctx->base.send = safexcel_aead_send;
 	ctx->base.handle_result = safexcel_aead_handle_result;
@@ -1203,9 +1580,9 @@
 
 struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_aes = {
 	.type = SAFEXCEL_ALG_TYPE_AEAD,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_SHA1,
 	.alg.aead = {
-		.setkey = safexcel_aead_aes_setkey,
+		.setkey = safexcel_aead_setkey,
 		.encrypt = safexcel_aead_encrypt,
 		.decrypt = safexcel_aead_decrypt,
 		.ivsize = AES_BLOCK_SIZE,
@@ -1213,7 +1590,7 @@
 		.base = {
 			.cra_name = "authenc(hmac(sha1),cbc(aes))",
 			.cra_driver_name = "safexcel-authenc-hmac-sha1-cbc-aes",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = AES_BLOCK_SIZE,
@@ -1238,9 +1615,9 @@
 
 struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_cbc_aes = {
 	.type = SAFEXCEL_ALG_TYPE_AEAD,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_SHA2_256,
 	.alg.aead = {
-		.setkey = safexcel_aead_aes_setkey,
+		.setkey = safexcel_aead_setkey,
 		.encrypt = safexcel_aead_encrypt,
 		.decrypt = safexcel_aead_decrypt,
 		.ivsize = AES_BLOCK_SIZE,
@@ -1248,7 +1625,7 @@
 		.base = {
 			.cra_name = "authenc(hmac(sha256),cbc(aes))",
 			.cra_driver_name = "safexcel-authenc-hmac-sha256-cbc-aes",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = AES_BLOCK_SIZE,
@@ -1273,9 +1650,9 @@
 
 struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_cbc_aes = {
 	.type = SAFEXCEL_ALG_TYPE_AEAD,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_SHA2_256,
 	.alg.aead = {
-		.setkey = safexcel_aead_aes_setkey,
+		.setkey = safexcel_aead_setkey,
 		.encrypt = safexcel_aead_encrypt,
 		.decrypt = safexcel_aead_decrypt,
 		.ivsize = AES_BLOCK_SIZE,
@@ -1283,7 +1660,7 @@
 		.base = {
 			.cra_name = "authenc(hmac(sha224),cbc(aes))",
 			.cra_driver_name = "safexcel-authenc-hmac-sha224-cbc-aes",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = AES_BLOCK_SIZE,
@@ -1308,9 +1685,9 @@
 
 struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_cbc_aes = {
 	.type = SAFEXCEL_ALG_TYPE_AEAD,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_SHA2_512,
 	.alg.aead = {
-		.setkey = safexcel_aead_aes_setkey,
+		.setkey = safexcel_aead_setkey,
 		.encrypt = safexcel_aead_encrypt,
 		.decrypt = safexcel_aead_decrypt,
 		.ivsize = AES_BLOCK_SIZE,
@@ -1318,7 +1695,7 @@
 		.base = {
 			.cra_name = "authenc(hmac(sha512),cbc(aes))",
 			.cra_driver_name = "safexcel-authenc-hmac-sha512-cbc-aes",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = AES_BLOCK_SIZE,
@@ -1343,9 +1720,9 @@
 
 struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_cbc_aes = {
 	.type = SAFEXCEL_ALG_TYPE_AEAD,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_SHA2_512,
 	.alg.aead = {
-		.setkey = safexcel_aead_aes_setkey,
+		.setkey = safexcel_aead_setkey,
 		.encrypt = safexcel_aead_encrypt,
 		.decrypt = safexcel_aead_decrypt,
 		.ivsize = AES_BLOCK_SIZE,
@@ -1353,7 +1730,7 @@
 		.base = {
 			.cra_name = "authenc(hmac(sha384),cbc(aes))",
 			.cra_driver_name = "safexcel-authenc-hmac-sha384-cbc-aes",
-			.cra_priority = 300,
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
 			.cra_flags = CRYPTO_ALG_ASYNC |
 				     CRYPTO_ALG_KERN_DRIVER_ONLY,
 			.cra_blocksize = AES_BLOCK_SIZE,
@@ -1365,3 +1742,564 @@
 		},
 	},
 };
+
+static int safexcel_aead_sha1_des3_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha1_cra_init(tfm);
+	ctx->alg = SAFEXCEL_3DES; /* override default */
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_cbc_des3_ede = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_DES | SAFEXCEL_ALG_SHA1,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = DES3_EDE_BLOCK_SIZE,
+		.maxauthsize = SHA1_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha1),cbc(des3_ede))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha1-cbc-des3_ede",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = DES3_EDE_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha1_des3_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha1_ctr_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha1_cra_init(tfm);
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD; /* override default */
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha1_ctr_aes = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_SHA1,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = CTR_RFC3686_IV_SIZE,
+		.maxauthsize = SHA1_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha1),rfc3686(ctr(aes)))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha1-ctr-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha1_ctr_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha256_ctr_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha256_cra_init(tfm);
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD; /* override default */
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha256_ctr_aes = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_SHA2_256,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = CTR_RFC3686_IV_SIZE,
+		.maxauthsize = SHA256_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha256),rfc3686(ctr(aes)))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha256-ctr-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha256_ctr_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha224_ctr_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha224_cra_init(tfm);
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD; /* override default */
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha224_ctr_aes = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_SHA2_256,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = CTR_RFC3686_IV_SIZE,
+		.maxauthsize = SHA224_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha224),rfc3686(ctr(aes)))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha224-ctr-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha224_ctr_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha512_ctr_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha512_cra_init(tfm);
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD; /* override default */
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha512_ctr_aes = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_SHA2_512,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = CTR_RFC3686_IV_SIZE,
+		.maxauthsize = SHA512_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha512),rfc3686(ctr(aes)))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha512-ctr-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha512_ctr_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_sha384_ctr_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_sha384_cra_init(tfm);
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_CTR_LOAD; /* override default */
+	return 0;
+}
+
+struct safexcel_alg_template safexcel_alg_authenc_hmac_sha384_ctr_aes = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_SHA2_512,
+	.alg.aead = {
+		.setkey = safexcel_aead_setkey,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = CTR_RFC3686_IV_SIZE,
+		.maxauthsize = SHA384_DIGEST_SIZE,
+		.base = {
+			.cra_name = "authenc(hmac(sha384),rfc3686(ctr(aes)))",
+			.cra_driver_name = "safexcel-authenc-hmac-sha384-ctr-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_sha384_ctr_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_skcipher_aesxts_setkey(struct crypto_skcipher *ctfm,
+					   const u8 *key, unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_skcipher_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct crypto_aes_ctx aes;
+	int ret, i;
+	unsigned int keylen;
+
+	/* Check for illegal XTS keys */
+	ret = xts_verify_key(ctfm, key, len);
+	if (ret)
+		return ret;
+
+	/* Only half of the key data is cipher key */
+	keylen = (len >> 1);
+	ret = aes_expandkey(&aes, key, keylen);
+	if (ret) {
+		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
+
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
+		for (i = 0; i < keylen / sizeof(u32); i++) {
+			if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+				ctx->base.needs_inv = true;
+				break;
+			}
+		}
+	}
+
+	for (i = 0; i < keylen / sizeof(u32); i++)
+		ctx->key[i] = cpu_to_le32(aes.key_enc[i]);
+
+	/* The other half is the tweak key */
+	ret = aes_expandkey(&aes, (u8 *)(key + keylen), keylen);
+	if (ret) {
+		crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return ret;
+	}
+
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
+		for (i = 0; i < keylen / sizeof(u32); i++) {
+			if (ctx->key[i + keylen / sizeof(u32)] !=
+			    cpu_to_le32(aes.key_enc[i])) {
+				ctx->base.needs_inv = true;
+				break;
+			}
+		}
+	}
+
+	for (i = 0; i < keylen / sizeof(u32); i++)
+		ctx->key[i + keylen / sizeof(u32)] =
+			cpu_to_le32(aes.key_enc[i]);
+
+	ctx->key_len = keylen << 1;
+
+	memzero_explicit(&aes, sizeof(aes));
+	return 0;
+}
+
+static int safexcel_skcipher_aes_xts_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_skcipher_cra_init(tfm);
+	ctx->alg  = SAFEXCEL_AES;
+	ctx->xts  = 1;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_XTS;
+	return 0;
+}
+
+static int safexcel_encrypt_xts(struct skcipher_request *req)
+{
+	if (req->cryptlen < XTS_BLOCK_SIZE)
+		return -EINVAL;
+	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
+				  SAFEXCEL_ENCRYPT);
+}
+
+static int safexcel_decrypt_xts(struct skcipher_request *req)
+{
+	if (req->cryptlen < XTS_BLOCK_SIZE)
+		return -EINVAL;
+	return safexcel_queue_req(&req->base, skcipher_request_ctx(req),
+				  SAFEXCEL_DECRYPT);
+}
+
+struct safexcel_alg_template safexcel_alg_xts_aes = {
+	.type = SAFEXCEL_ALG_TYPE_SKCIPHER,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_AES_XTS,
+	.alg.skcipher = {
+		.setkey = safexcel_skcipher_aesxts_setkey,
+		.encrypt = safexcel_encrypt_xts,
+		.decrypt = safexcel_decrypt_xts,
+		/* XTS actually uses 2 AES keys glued together */
+		.min_keysize = AES_MIN_KEY_SIZE * 2,
+		.max_keysize = AES_MAX_KEY_SIZE * 2,
+		.ivsize = XTS_BLOCK_SIZE,
+		.base = {
+			.cra_name = "xts(aes)",
+			.cra_driver_name = "safexcel-xts-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = XTS_BLOCK_SIZE,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_skcipher_aes_xts_cra_init,
+			.cra_exit = safexcel_skcipher_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_gcm_setkey(struct crypto_aead *ctfm, const u8 *key,
+				    unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct crypto_aes_ctx aes;
+	u32 hashkey[AES_BLOCK_SIZE >> 2];
+	int ret, i;
+
+	ret = aes_expandkey(&aes, key, len);
+	if (ret) {
+		crypto_aead_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		memzero_explicit(&aes, sizeof(aes));
+		return ret;
+	}
+
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
+		for (i = 0; i < len / sizeof(u32); i++) {
+			if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+				ctx->base.needs_inv = true;
+				break;
+			}
+		}
+	}
+
+	for (i = 0; i < len / sizeof(u32); i++)
+		ctx->key[i] = cpu_to_le32(aes.key_enc[i]);
+
+	ctx->key_len = len;
+
+	/* Compute hash key by encrypting zeroes with cipher key */
+	crypto_cipher_clear_flags(ctx->hkaes, CRYPTO_TFM_REQ_MASK);
+	crypto_cipher_set_flags(ctx->hkaes, crypto_aead_get_flags(ctfm) &
+				CRYPTO_TFM_REQ_MASK);
+	ret = crypto_cipher_setkey(ctx->hkaes, key, len);
+	crypto_aead_set_flags(ctfm, crypto_cipher_get_flags(ctx->hkaes) &
+			      CRYPTO_TFM_RES_MASK);
+	if (ret)
+		return ret;
+
+	memset(hashkey, 0, AES_BLOCK_SIZE);
+	crypto_cipher_encrypt_one(ctx->hkaes, (u8 *)hashkey, (u8 *)hashkey);
+
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
+		for (i = 0; i < AES_BLOCK_SIZE / sizeof(u32); i++) {
+			if (ctx->ipad[i] != cpu_to_be32(hashkey[i])) {
+				ctx->base.needs_inv = true;
+				break;
+			}
+		}
+	}
+
+	for (i = 0; i < AES_BLOCK_SIZE / sizeof(u32); i++)
+		ctx->ipad[i] = cpu_to_be32(hashkey[i]);
+
+	memzero_explicit(hashkey, AES_BLOCK_SIZE);
+	memzero_explicit(&aes, sizeof(aes));
+	return 0;
+}
+
+static int safexcel_aead_gcm_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_cra_init(tfm);
+	ctx->hash_alg = CONTEXT_CONTROL_CRYPTO_ALG_GHASH;
+	ctx->state_sz = GHASH_BLOCK_SIZE;
+	ctx->xcm = EIP197_XCM_MODE_GCM;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_XCM; /* override default */
+
+	ctx->hkaes = crypto_alloc_cipher("aes", 0, 0);
+	if (IS_ERR(ctx->hkaes))
+		return PTR_ERR(ctx->hkaes);
+
+	return 0;
+}
+
+static void safexcel_aead_gcm_cra_exit(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	crypto_free_cipher(ctx->hkaes);
+	safexcel_aead_cra_exit(tfm);
+}
+
+static int safexcel_aead_gcm_setauthsize(struct crypto_aead *tfm,
+					 unsigned int authsize)
+{
+	return crypto_gcm_check_authsize(authsize);
+}
+
+struct safexcel_alg_template safexcel_alg_gcm = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_GHASH,
+	.alg.aead = {
+		.setkey = safexcel_aead_gcm_setkey,
+		.setauthsize = safexcel_aead_gcm_setauthsize,
+		.encrypt = safexcel_aead_encrypt,
+		.decrypt = safexcel_aead_decrypt,
+		.ivsize = GCM_AES_IV_SIZE,
+		.maxauthsize = GHASH_DIGEST_SIZE,
+		.base = {
+			.cra_name = "gcm(aes)",
+			.cra_driver_name = "safexcel-gcm-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_gcm_cra_init,
+			.cra_exit = safexcel_aead_gcm_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
+
+static int safexcel_aead_ccm_setkey(struct crypto_aead *ctfm, const u8 *key,
+				    unsigned int len)
+{
+	struct crypto_tfm *tfm = crypto_aead_tfm(ctfm);
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+	struct safexcel_crypto_priv *priv = ctx->priv;
+	struct crypto_aes_ctx aes;
+	int ret, i;
+
+	ret = aes_expandkey(&aes, key, len);
+	if (ret) {
+		crypto_aead_set_flags(ctfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		memzero_explicit(&aes, sizeof(aes));
+		return ret;
+	}
+
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr_dma) {
+		for (i = 0; i < len / sizeof(u32); i++) {
+			if (ctx->key[i] != cpu_to_le32(aes.key_enc[i])) {
+				ctx->base.needs_inv = true;
+				break;
+			}
+		}
+	}
+
+	for (i = 0; i < len / sizeof(u32); i++) {
+		ctx->key[i] = cpu_to_le32(aes.key_enc[i]);
+		ctx->ipad[i + 2 * AES_BLOCK_SIZE / sizeof(u32)] =
+			cpu_to_be32(aes.key_enc[i]);
+	}
+
+	ctx->key_len = len;
+	ctx->state_sz = 2 * AES_BLOCK_SIZE + len;
+
+	if (len == AES_KEYSIZE_192)
+		ctx->hash_alg = CONTEXT_CONTROL_CRYPTO_ALG_XCBC192;
+	else if (len == AES_KEYSIZE_256)
+		ctx->hash_alg = CONTEXT_CONTROL_CRYPTO_ALG_XCBC256;
+	else
+		ctx->hash_alg = CONTEXT_CONTROL_CRYPTO_ALG_XCBC128;
+
+	memzero_explicit(&aes, sizeof(aes));
+	return 0;
+}
+
+static int safexcel_aead_ccm_cra_init(struct crypto_tfm *tfm)
+{
+	struct safexcel_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
+
+	safexcel_aead_cra_init(tfm);
+	ctx->hash_alg = CONTEXT_CONTROL_CRYPTO_ALG_XCBC128;
+	ctx->state_sz = 3 * AES_BLOCK_SIZE;
+	ctx->xcm = EIP197_XCM_MODE_CCM;
+	ctx->mode = CONTEXT_CONTROL_CRYPTO_MODE_XCM; /* override default */
+	return 0;
+}
+
+static int safexcel_aead_ccm_setauthsize(struct crypto_aead *tfm,
+					 unsigned int authsize)
+{
+	/* Borrowed from crypto/ccm.c */
+	switch (authsize) {
+	case 4:
+	case 6:
+	case 8:
+	case 10:
+	case 12:
+	case 14:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int safexcel_ccm_encrypt(struct aead_request *req)
+{
+	struct safexcel_cipher_req *creq = aead_request_ctx(req);
+
+	if (req->iv[0] < 1 || req->iv[0] > 7)
+		return -EINVAL;
+
+	return safexcel_queue_req(&req->base, creq, SAFEXCEL_ENCRYPT);
+}
+
+static int safexcel_ccm_decrypt(struct aead_request *req)
+{
+	struct safexcel_cipher_req *creq = aead_request_ctx(req);
+
+	if (req->iv[0] < 1 || req->iv[0] > 7)
+		return -EINVAL;
+
+	return safexcel_queue_req(&req->base, creq, SAFEXCEL_DECRYPT);
+}
+
+struct safexcel_alg_template safexcel_alg_ccm = {
+	.type = SAFEXCEL_ALG_TYPE_AEAD,
+	.algo_mask = SAFEXCEL_ALG_AES | SAFEXCEL_ALG_CBC_MAC_ALL,
+	.alg.aead = {
+		.setkey = safexcel_aead_ccm_setkey,
+		.setauthsize = safexcel_aead_ccm_setauthsize,
+		.encrypt = safexcel_ccm_encrypt,
+		.decrypt = safexcel_ccm_decrypt,
+		.ivsize = AES_BLOCK_SIZE,
+		.maxauthsize = AES_BLOCK_SIZE,
+		.base = {
+			.cra_name = "ccm(aes)",
+			.cra_driver_name = "safexcel-ccm-aes",
+			.cra_priority = SAFEXCEL_CRA_PRIORITY,
+			.cra_flags = CRYPTO_ALG_ASYNC |
+				     CRYPTO_ALG_KERN_DRIVER_ONLY,
+			.cra_blocksize = 1,
+			.cra_ctxsize = sizeof(struct safexcel_cipher_ctx),
+			.cra_alignmask = 0,
+			.cra_init = safexcel_aead_ccm_cra_init,
+			.cra_exit = safexcel_aead_cra_exit,
+			.cra_module = THIS_MODULE,
+		},
+	},
+};
diff --git a/drivers/crypto/inside-secure/safexcel_hash.c b/drivers/crypto/inside-secure/safexcel_hash.c
index a80a5e7..2effb6d 100644
--- a/drivers/crypto/inside-secure/safexcel_hash.c
+++ b/drivers/crypto/inside-secure/safexcel_hash.c
@@ -29,33 +29,31 @@
 	bool finish;
 	bool hmac;
 	bool needs_inv;
+	bool hmac_zlen;
+	bool len_is_le;
 
 	int nents;
 	dma_addr_t result_dma;
 
 	u32 digest;
 
-	u8 state_sz;    /* expected sate size, only set once */
+	u8 state_sz;    /* expected state size, only set once */
+	u8 block_sz;    /* block size, only set once */
 	u32 state[SHA512_DIGEST_SIZE / sizeof(u32)] __aligned(sizeof(u32));
 
-	u64 len[2];
-	u64 processed[2];
+	u64 len;
+	u64 processed;
 
-	u8 cache[SHA512_BLOCK_SIZE << 1] __aligned(sizeof(u32));
+	u8 cache[HASH_CACHE_SIZE] __aligned(sizeof(u32));
 	dma_addr_t cache_dma;
 	unsigned int cache_sz;
 
-	u8 cache_next[SHA512_BLOCK_SIZE << 1] __aligned(sizeof(u32));
+	u8 cache_next[HASH_CACHE_SIZE] __aligned(sizeof(u32));
 };
 
 static inline u64 safexcel_queued_len(struct safexcel_ahash_req *req)
 {
-	u64 len, processed;
-
-	len = (0xffffffff * req->len[1]) + req->len[0];
-	processed = (0xffffffff * req->processed[1]) + req->processed[0];
-
-	return len - processed;
+	return req->len - req->processed;
 }
 
 static void safexcel_hash_token(struct safexcel_command_desc *cdesc,
@@ -79,75 +77,104 @@
 
 static void safexcel_context_control(struct safexcel_ahash_ctx *ctx,
 				     struct safexcel_ahash_req *req,
-				     struct safexcel_command_desc *cdesc,
-				     unsigned int digestsize)
+				     struct safexcel_command_desc *cdesc)
 {
 	struct safexcel_crypto_priv *priv = ctx->priv;
-	int i;
+	u64 count = 0;
 
-	cdesc->control_data.control0 |= CONTEXT_CONTROL_TYPE_HASH_OUT;
 	cdesc->control_data.control0 |= ctx->alg;
-	cdesc->control_data.control0 |= req->digest;
 
-	if (!req->finish)
-		cdesc->control_data.control0 |= CONTEXT_CONTROL_NO_FINISH_HASH;
-
-	if (req->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) {
-		if (req->processed[0] || req->processed[1]) {
-			if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_MD5)
-				cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(5);
-			else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA1)
-				cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(6);
-			else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA224 ||
-				 ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA256)
-				cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(9);
-			else if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA384 ||
-				 ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_SHA512)
-				cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(17);
-
-			cdesc->control_data.control1 |= CONTEXT_CONTROL_DIGEST_CNT;
+	/*
+	 * Copy the input digest if needed, and setup the context
+	 * fields. Do this now as we need it to setup the first command
+	 * descriptor.
+	 */
+	if (!req->processed) {
+		/* First - and possibly only - block of basic hash only */
+		if (req->finish) {
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_TYPE_HASH_OUT |
+				CONTEXT_CONTROL_RESTART_HASH  |
+				/* ensure its not 0! */
+				CONTEXT_CONTROL_SIZE(1);
 		} else {
-			cdesc->control_data.control0 |= CONTEXT_CONTROL_RESTART_HASH;
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_TYPE_HASH_OUT  |
+				CONTEXT_CONTROL_RESTART_HASH   |
+				CONTEXT_CONTROL_NO_FINISH_HASH |
+				/* ensure its not 0! */
+				CONTEXT_CONTROL_SIZE(1);
 		}
+		return;
+	}
 
-		/*
-		 * Copy the input digest if needed, and setup the context
-		 * fields. Do this now as we need it to setup the first command
-		 * descriptor.
-		 */
-		if (req->processed[0] || req->processed[1]) {
-			for (i = 0; i < digestsize / sizeof(u32); i++)
-				ctx->base.ctxr->data[i] = cpu_to_le32(req->state[i]);
+	/* Hash continuation or HMAC, setup (inner) digest from state */
+	memcpy(ctx->base.ctxr->data, req->state, req->state_sz);
 
-			if (req->finish) {
-				u64 count = req->processed[0] / EIP197_COUNTER_BLOCK_SIZE;
-				count += ((0xffffffff / EIP197_COUNTER_BLOCK_SIZE) *
-					  req->processed[1]);
+	if (req->finish) {
+		/* Compute digest count for hash/HMAC finish operations */
+		if ((req->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) ||
+		    req->hmac_zlen || (req->processed != req->block_sz)) {
+			count = req->processed / EIP197_COUNTER_BLOCK_SIZE;
 
-				/* This is a haredware limitation, as the
-				 * counter must fit into an u32. This represents
-				 * a farily big amount of input data, so we
-				 * shouldn't see this.
-				 */
-				if (unlikely(count & 0xffff0000)) {
-					dev_warn(priv->dev,
-						 "Input data is too big\n");
-					return;
-				}
-
-				ctx->base.ctxr->data[i] = cpu_to_le32(count);
+			/* This is a hardware limitation, as the
+			 * counter must fit into an u32. This represents
+			 * a fairly big amount of input data, so we
+			 * shouldn't see this.
+			 */
+			if (unlikely(count & 0xffffffff00000000ULL)) {
+				dev_warn(priv->dev,
+					 "Input data is too big\n");
+				return;
 			}
 		}
-	} else if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC) {
-		cdesc->control_data.control0 |= CONTEXT_CONTROL_SIZE(2 * req->state_sz / sizeof(u32));
 
-		memcpy(ctx->base.ctxr->data, ctx->ipad, req->state_sz);
-		memcpy(ctx->base.ctxr->data + req->state_sz / sizeof(u32),
-		       ctx->opad, req->state_sz);
+		if ((req->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED) ||
+		    /* Special case: zero length HMAC */
+		    req->hmac_zlen ||
+		    /* PE HW < 4.4 cannot do HMAC continue, fake using hash */
+		    (req->processed != req->block_sz)) {
+			/* Basic hash continue operation, need digest + cnt */
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_SIZE((req->state_sz >> 2) + 1) |
+				CONTEXT_CONTROL_TYPE_HASH_OUT |
+				CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+			/* For zero-len HMAC, don't finalize, already padded! */
+			if (req->hmac_zlen)
+				cdesc->control_data.control0 |=
+					CONTEXT_CONTROL_NO_FINISH_HASH;
+			cdesc->control_data.control1 |=
+				CONTEXT_CONTROL_DIGEST_CNT;
+			ctx->base.ctxr->data[req->state_sz >> 2] =
+				cpu_to_le32(count);
+			req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+
+			/* Clear zero-length HMAC flag for next operation! */
+			req->hmac_zlen = false;
+		} else { /* HMAC */
+			/* Need outer digest for HMAC finalization */
+			memcpy(ctx->base.ctxr->data + (req->state_sz >> 2),
+			       ctx->opad, req->state_sz);
+
+			/* Single pass HMAC - no digest count */
+			cdesc->control_data.control0 |=
+				CONTEXT_CONTROL_SIZE(req->state_sz >> 1) |
+				CONTEXT_CONTROL_TYPE_HASH_OUT |
+				CONTEXT_CONTROL_DIGEST_HMAC;
+		}
+	} else { /* Hash continuation, do not finish yet */
+		cdesc->control_data.control0 |=
+			CONTEXT_CONTROL_SIZE(req->state_sz >> 2) |
+			CONTEXT_CONTROL_DIGEST_PRECOMPUTED |
+			CONTEXT_CONTROL_TYPE_HASH_OUT |
+			CONTEXT_CONTROL_NO_FINISH_HASH;
 	}
 }
 
-static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv, int ring,
+static int safexcel_ahash_enqueue(struct ahash_request *areq);
+
+static int safexcel_handle_req_result(struct safexcel_crypto_priv *priv,
+				      int ring,
 				      struct crypto_async_request *async,
 				      bool *should_complete, int *ret)
 {
@@ -155,6 +182,7 @@
 	struct ahash_request *areq = ahash_request_cast(async);
 	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_req *sreq = ahash_request_ctx(areq);
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(ahash);
 	u64 cache_len;
 
 	*ret = 0;
@@ -188,9 +216,31 @@
 		sreq->cache_sz = 0;
 	}
 
-	if (sreq->finish)
+	if (sreq->finish) {
+		if (sreq->hmac &&
+		    (sreq->digest != CONTEXT_CONTROL_DIGEST_HMAC)) {
+			/* Faking HMAC using hash - need to do outer hash */
+			memcpy(sreq->cache, sreq->state,
+			       crypto_ahash_digestsize(ahash));
+
+			memcpy(sreq->state, ctx->opad, sreq->state_sz);
+
+			sreq->len = sreq->block_sz +
+				    crypto_ahash_digestsize(ahash);
+			sreq->processed = sreq->block_sz;
+			sreq->hmac = 0;
+
+			ctx->base.needs_inv = true;
+			areq->nbytes = 0;
+			safexcel_ahash_enqueue(areq);
+
+			*should_complete = false; /* Not done yet */
+			return 1;
+		}
+
 		memcpy(areq->result, sreq->state,
 		       crypto_ahash_digestsize(ahash));
+	}
 
 	cache_len = safexcel_queued_len(sreq);
 	if (cache_len)
@@ -205,7 +255,6 @@
 				   int *commands, int *results)
 {
 	struct ahash_request *areq = ahash_request_cast(async);
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	struct safexcel_crypto_priv *priv = ctx->priv;
@@ -213,33 +262,25 @@
 	struct safexcel_result_desc *rdesc;
 	struct scatterlist *sg;
 	int i, extra = 0, n_cdesc = 0, ret = 0;
-	u64 queued, len, cache_len, cache_max;
-
-	cache_max = crypto_ahash_blocksize(ahash);
-	if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC)
-		cache_max <<= 1;
+	u64 queued, len, cache_len;
 
 	queued = len = safexcel_queued_len(req);
-	if (queued <= cache_max)
+	if (queued <= HASH_CACHE_SIZE)
 		cache_len = queued;
 	else
 		cache_len = queued - areq->nbytes;
 
-	if (!req->last_req) {
+	if (!req->finish && !req->last_req) {
 		/* If this is not the last request and the queued data does not
-		 * fit into full blocks, cache it for the next send() call.
+		 * fit into full cache blocks, cache it for the next send call.
 		 */
-		extra = queued & (crypto_ahash_blocksize(ahash) - 1);
-
-		if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC &&
-		    extra < crypto_ahash_blocksize(ahash))
-			extra += crypto_ahash_blocksize(ahash);
+		extra = queued & (HASH_CACHE_SIZE - 1);
 
 		/* If this is not the last request and the queued data
 		 * is a multiple of a block, cache the last one for now.
 		 */
 		if (!extra)
-			extra = crypto_ahash_blocksize(ahash);
+			extra = HASH_CACHE_SIZE;
 
 		sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
 				   req->cache_next, extra,
@@ -247,6 +288,12 @@
 
 		queued -= extra;
 		len -= extra;
+
+		if (!queued) {
+			*commands = 0;
+			*results = 0;
+			return 0;
+		}
 	}
 
 	/* Add a command descriptor for the cached data, if any */
@@ -272,8 +319,14 @@
 			goto send_command;
 	}
 
+	/* Skip descriptor generation for zero-length requests */
+	if (!areq->nbytes)
+		goto send_command;
+
 	/* Now handle the current ahash request buffer(s) */
-	req->nents = dma_map_sg(priv->dev, areq->src, sg_nents(areq->src),
+	req->nents = dma_map_sg(priv->dev, areq->src,
+				sg_nents_for_len(areq->src,
+						 areq->nbytes),
 				DMA_TO_DEVICE);
 	if (!req->nents) {
 		ret = -ENOMEM;
@@ -288,7 +341,8 @@
 			sglen = queued;
 
 		cdesc = safexcel_add_cdesc(priv, ring, !n_cdesc,
-					   !(queued - sglen), sg_dma_address(sg),
+					   !(queued - sglen),
+					   sg_dma_address(sg),
 					   sglen, len, ctx->base.ctxr_dma);
 		if (IS_ERR(cdesc)) {
 			ret = PTR_ERR(cdesc);
@@ -306,7 +360,7 @@
 
 send_command:
 	/* Setup the context options */
-	safexcel_context_control(ctx, req, first_cdesc, req->state_sz);
+	safexcel_context_control(ctx, req, first_cdesc);
 
 	/* Add the token */
 	safexcel_hash_token(first_cdesc, len, req->state_sz);
@@ -328,9 +382,7 @@
 
 	safexcel_rdr_req_set(priv, ring, rdesc, &areq->base);
 
-	req->processed[0] += len;
-	if (req->processed[0] < len)
-		req->processed[1]++;
+	req->processed += len;
 
 	*commands = n_cdesc;
 	*results = 1;
@@ -355,27 +407,6 @@
 	return ret;
 }
 
-static inline bool safexcel_ahash_needs_inv_get(struct ahash_request *areq)
-{
-	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
-	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
-	unsigned int state_w_sz = req->state_sz / sizeof(u32);
-	u64 processed;
-	int i;
-
-	processed = req->processed[0] / EIP197_COUNTER_BLOCK_SIZE;
-	processed += (0xffffffff / EIP197_COUNTER_BLOCK_SIZE) * req->processed[1];
-
-	for (i = 0; i < state_w_sz; i++)
-		if (ctx->base.ctxr->data[i] != cpu_to_le32(req->state[i]))
-			return true;
-
-	if (ctx->base.ctxr->data[state_w_sz] != cpu_to_le32(processed))
-		return true;
-
-	return false;
-}
-
 static int safexcel_handle_inv_result(struct safexcel_crypto_priv *priv,
 				      int ring,
 				      struct crypto_async_request *async,
@@ -523,30 +554,25 @@
 /* safexcel_ahash_cache: cache data until at least one request can be sent to
  * the engine, aka. when there is at least 1 block size in the pipe.
  */
-static int safexcel_ahash_cache(struct ahash_request *areq, u32 cache_max)
+static int safexcel_ahash_cache(struct ahash_request *areq)
 {
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
-	u64 queued, cache_len;
+	u64 cache_len;
 
-	/* queued: everything accepted by the driver which will be handled by
-	 * the next send() calls.
-	 * tot sz handled by update() - tot sz handled by send()
-	 */
-	queued = safexcel_queued_len(req);
 	/* cache_len: everything accepted by the driver but not sent yet,
 	 * tot sz handled by update() - last req sz - tot sz handled by send()
 	 */
-	cache_len = queued - areq->nbytes;
+	cache_len = safexcel_queued_len(req);
 
 	/*
 	 * In case there isn't enough bytes to proceed (less than a
 	 * block size), cache the data until we have enough.
 	 */
-	if (cache_len + areq->nbytes <= cache_max) {
+	if (cache_len + areq->nbytes <= HASH_CACHE_SIZE) {
 		sg_pcopy_to_buffer(areq->src, sg_nents(areq->src),
 				   req->cache + cache_len,
 				   areq->nbytes, 0);
-		return areq->nbytes;
+		return 0;
 	}
 
 	/* We couldn't cache all the data */
@@ -564,14 +590,25 @@
 
 	if (ctx->base.ctxr) {
 		if (priv->flags & EIP197_TRC_CACHE && !ctx->base.needs_inv &&
-		    (req->processed[0] || req->processed[1]) &&
-		    req->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)
-			/* We're still setting needs_inv here, even though it is
+		    req->processed &&
+		    (/* invalidate for basic hash continuation finish */
+		     (req->finish &&
+		      (req->digest == CONTEXT_CONTROL_DIGEST_PRECOMPUTED)) ||
+		     /* invalidate if (i)digest changed */
+		     memcmp(ctx->base.ctxr->data, req->state, req->state_sz) ||
+		     /* invalidate for HMAC continuation finish */
+		     (req->finish && (req->processed != req->block_sz)) ||
+		     /* invalidate for HMAC finish with odigest changed */
+		     (req->finish &&
+		      memcmp(ctx->base.ctxr->data + (req->state_sz>>2),
+			     ctx->opad, req->state_sz))))
+			/*
+			 * We're still setting needs_inv here, even though it is
 			 * cleared right away, because the needs_inv flag can be
 			 * set in other functions and we want to keep the same
 			 * logic.
 			 */
-			ctx->base.needs_inv = safexcel_ahash_needs_inv_get(areq);
+			ctx->base.needs_inv = true;
 
 		if (ctx->base.needs_inv) {
 			ctx->base.needs_inv = false;
@@ -601,35 +638,23 @@
 static int safexcel_ahash_update(struct ahash_request *areq)
 {
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
-	u32 cache_max;
+	int ret;
 
 	/* If the request is 0 length, do nothing */
 	if (!areq->nbytes)
 		return 0;
 
-	req->len[0] += areq->nbytes;
-	if (req->len[0] < areq->nbytes)
-		req->len[1]++;
+	/* Add request to the cache if it fits */
+	ret = safexcel_ahash_cache(areq);
 
-	cache_max = crypto_ahash_blocksize(ahash);
-	if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC)
-		cache_max <<= 1;
+	/* Update total request length */
+	req->len += areq->nbytes;
 
-	safexcel_ahash_cache(areq, cache_max);
-
-	/*
-	 * We're not doing partial updates when performing an hmac request.
-	 * Everything will be handled by the final() call.
+	/* If not all data could fit into the cache, go process the excess.
+	 * Also go process immediately for an HMAC IV precompute, which
+	 * will never be finished at all, but needs to be processed anyway.
 	 */
-	if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC)
-		return 0;
-
-	if (req->hmac)
-		return safexcel_ahash_enqueue(areq);
-
-	if (!req->last_req &&
-	    safexcel_queued_len(req) > cache_max)
+	if ((ret && !req->finish) || req->last_req)
 		return safexcel_ahash_enqueue(areq);
 
 	return 0;
@@ -640,11 +665,14 @@
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 
-	req->last_req = true;
 	req->finish = true;
 
-	/* If we have an overall 0 length request */
-	if (!req->len[0] && !req->len[1] && !areq->nbytes) {
+	if (unlikely(!req->len && !areq->nbytes)) {
+		/*
+		 * If we have an overall 0 length *hash* request:
+		 * The HW cannot do 0 length hash, so we provide the correct
+		 * result directly here.
+		 */
 		if (ctx->alg == CONTEXT_CONTROL_CRYPTO_ALG_MD5)
 			memcpy(areq->result, md5_zero_message_hash,
 			       MD5_DIGEST_SIZE);
@@ -665,6 +693,43 @@
 			       SHA512_DIGEST_SIZE);
 
 		return 0;
+	} else if (unlikely(req->hmac &&
+			    (req->len == req->block_sz) &&
+			    !areq->nbytes)) {
+		/*
+		 * If we have an overall 0 length *HMAC* request:
+		 * For HMAC, we need to finalize the inner digest
+		 * and then perform the outer hash.
+		 */
+
+		/* generate pad block in the cache */
+		/* start with a hash block of all zeroes */
+		memset(req->cache, 0, req->block_sz);
+		/* set the first byte to 0x80 to 'append a 1 bit' */
+		req->cache[0] = 0x80;
+		/* add the length in bits in the last 2 bytes */
+		if (req->len_is_le) {
+			/* Little endian length word (e.g. MD5) */
+			req->cache[req->block_sz-8] = (req->block_sz << 3) &
+						      255;
+			req->cache[req->block_sz-7] = (req->block_sz >> 5);
+		} else {
+			/* Big endian length word (e.g. any SHA) */
+			req->cache[req->block_sz-2] = (req->block_sz >> 5);
+			req->cache[req->block_sz-1] = (req->block_sz << 3) &
+						      255;
+		}
+
+		req->len += req->block_sz; /* plus 1 hash block */
+
+		/* Set special zero-length HMAC flag */
+		req->hmac_zlen = true;
+
+		/* Finalize HMAC */
+		req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	} else if (req->hmac) {
+		/* Finalize HMAC */
+		req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
 	}
 
 	return safexcel_ahash_enqueue(areq);
@@ -674,7 +739,6 @@
 {
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 
-	req->last_req = true;
 	req->finish = true;
 
 	safexcel_ahash_update(areq);
@@ -683,52 +747,36 @@
 
 static int safexcel_ahash_export(struct ahash_request *areq, void *out)
 {
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 	struct safexcel_ahash_export_state *export = out;
-	u32 cache_sz;
 
-	cache_sz = crypto_ahash_blocksize(ahash);
-	if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC)
-		cache_sz <<= 1;
-
-	export->len[0] = req->len[0];
-	export->len[1] = req->len[1];
-	export->processed[0] = req->processed[0];
-	export->processed[1] = req->processed[1];
+	export->len = req->len;
+	export->processed = req->processed;
 
 	export->digest = req->digest;
 
 	memcpy(export->state, req->state, req->state_sz);
-	memcpy(export->cache, req->cache, cache_sz);
+	memcpy(export->cache, req->cache, HASH_CACHE_SIZE);
 
 	return 0;
 }
 
 static int safexcel_ahash_import(struct ahash_request *areq, const void *in)
 {
-	struct crypto_ahash *ahash = crypto_ahash_reqtfm(areq);
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 	const struct safexcel_ahash_export_state *export = in;
-	u32 cache_sz;
 	int ret;
 
 	ret = crypto_ahash_init(areq);
 	if (ret)
 		return ret;
 
-	cache_sz = crypto_ahash_blocksize(ahash);
-	if (req->digest == CONTEXT_CONTROL_DIGEST_HMAC)
-		cache_sz <<= 1;
-
-	req->len[0] = export->len[0];
-	req->len[1] = export->len[1];
-	req->processed[0] = export->processed[0];
-	req->processed[1] = export->processed[1];
+	req->len = export->len;
+	req->processed = export->processed;
 
 	req->digest = export->digest;
 
-	memcpy(req->cache, export->cache, cache_sz);
+	memcpy(req->cache, export->cache, HASH_CACHE_SIZE);
 	memcpy(req->state, export->state, req->state_sz);
 
 	return 0;
@@ -757,15 +805,10 @@
 
 	memset(req, 0, sizeof(*req));
 
-	req->state[0] = SHA1_H0;
-	req->state[1] = SHA1_H1;
-	req->state[2] = SHA1_H2;
-	req->state[3] = SHA1_H3;
-	req->state[4] = SHA1_H4;
-
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA1;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA1_DIGEST_SIZE;
+	req->block_sz = SHA1_BLOCK_SIZE;
 
 	return 0;
 }
@@ -802,7 +845,7 @@
 
 struct safexcel_alg_template safexcel_alg_sha1 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_SHA1,
 	.alg.ahash = {
 		.init = safexcel_sha1_init,
 		.update = safexcel_ahash_update,
@@ -817,7 +860,7 @@
 			.base = {
 				.cra_name = "sha1",
 				.cra_driver_name = "safexcel-sha1",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = SHA1_BLOCK_SIZE,
@@ -832,10 +875,23 @@
 
 static int safexcel_hmac_sha1_init(struct ahash_request *areq)
 {
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 
-	safexcel_sha1_init(areq);
-	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	memset(req, 0, sizeof(*req));
+
+	/* Start from ipad precompute */
+	memcpy(req->state, ctx->ipad, SHA1_DIGEST_SIZE);
+	/* Already processed the key^ipad part now! */
+	req->len	= SHA1_BLOCK_SIZE;
+	req->processed	= SHA1_BLOCK_SIZE;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA1;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA1_DIGEST_SIZE;
+	req->block_sz = SHA1_BLOCK_SIZE;
+	req->hmac = true;
+
 	return 0;
 }
 
@@ -1004,21 +1060,16 @@
 	struct safexcel_ahash_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm));
 	struct safexcel_crypto_priv *priv = ctx->priv;
 	struct safexcel_ahash_export_state istate, ostate;
-	int ret, i;
+	int ret;
 
 	ret = safexcel_hmac_setkey(alg, key, keylen, &istate, &ostate);
 	if (ret)
 		return ret;
 
-	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr) {
-		for (i = 0; i < state_sz / sizeof(u32); i++) {
-			if (ctx->ipad[i] != le32_to_cpu(istate.state[i]) ||
-			    ctx->opad[i] != le32_to_cpu(ostate.state[i])) {
-				ctx->base.needs_inv = true;
-				break;
-			}
-		}
-	}
+	if (priv->flags & EIP197_TRC_CACHE && ctx->base.ctxr &&
+	    (memcmp(ctx->ipad, istate.state, state_sz) ||
+	     memcmp(ctx->opad, ostate.state, state_sz)))
+		ctx->base.needs_inv = true;
 
 	memcpy(ctx->ipad, &istate.state, state_sz);
 	memcpy(ctx->opad, &ostate.state, state_sz);
@@ -1035,7 +1086,7 @@
 
 struct safexcel_alg_template safexcel_alg_hmac_sha1 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_SHA1,
 	.alg.ahash = {
 		.init = safexcel_hmac_sha1_init,
 		.update = safexcel_ahash_update,
@@ -1051,7 +1102,7 @@
 			.base = {
 				.cra_name = "hmac(sha1)",
 				.cra_driver_name = "safexcel-hmac-sha1",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = SHA1_BLOCK_SIZE,
@@ -1071,18 +1122,10 @@
 
 	memset(req, 0, sizeof(*req));
 
-	req->state[0] = SHA256_H0;
-	req->state[1] = SHA256_H1;
-	req->state[2] = SHA256_H2;
-	req->state[3] = SHA256_H3;
-	req->state[4] = SHA256_H4;
-	req->state[5] = SHA256_H5;
-	req->state[6] = SHA256_H6;
-	req->state[7] = SHA256_H7;
-
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA256;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA256_DIGEST_SIZE;
+	req->block_sz = SHA256_BLOCK_SIZE;
 
 	return 0;
 }
@@ -1099,7 +1142,7 @@
 
 struct safexcel_alg_template safexcel_alg_sha256 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_SHA2_256,
 	.alg.ahash = {
 		.init = safexcel_sha256_init,
 		.update = safexcel_ahash_update,
@@ -1114,7 +1157,7 @@
 			.base = {
 				.cra_name = "sha256",
 				.cra_driver_name = "safexcel-sha256",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = SHA256_BLOCK_SIZE,
@@ -1134,18 +1177,10 @@
 
 	memset(req, 0, sizeof(*req));
 
-	req->state[0] = SHA224_H0;
-	req->state[1] = SHA224_H1;
-	req->state[2] = SHA224_H2;
-	req->state[3] = SHA224_H3;
-	req->state[4] = SHA224_H4;
-	req->state[5] = SHA224_H5;
-	req->state[6] = SHA224_H6;
-	req->state[7] = SHA224_H7;
-
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA224;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA256_DIGEST_SIZE;
+	req->block_sz = SHA256_BLOCK_SIZE;
 
 	return 0;
 }
@@ -1162,7 +1197,7 @@
 
 struct safexcel_alg_template safexcel_alg_sha224 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_SHA2_256,
 	.alg.ahash = {
 		.init = safexcel_sha224_init,
 		.update = safexcel_ahash_update,
@@ -1177,7 +1212,7 @@
 			.base = {
 				.cra_name = "sha224",
 				.cra_driver_name = "safexcel-sha224",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = SHA224_BLOCK_SIZE,
@@ -1199,10 +1234,23 @@
 
 static int safexcel_hmac_sha224_init(struct ahash_request *areq)
 {
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 
-	safexcel_sha224_init(areq);
-	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	memset(req, 0, sizeof(*req));
+
+	/* Start from ipad precompute */
+	memcpy(req->state, ctx->ipad, SHA256_DIGEST_SIZE);
+	/* Already processed the key^ipad part now! */
+	req->len	= SHA256_BLOCK_SIZE;
+	req->processed	= SHA256_BLOCK_SIZE;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA224;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA256_DIGEST_SIZE;
+	req->block_sz = SHA256_BLOCK_SIZE;
+	req->hmac = true;
+
 	return 0;
 }
 
@@ -1218,7 +1266,7 @@
 
 struct safexcel_alg_template safexcel_alg_hmac_sha224 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_SHA2_256,
 	.alg.ahash = {
 		.init = safexcel_hmac_sha224_init,
 		.update = safexcel_ahash_update,
@@ -1234,7 +1282,7 @@
 			.base = {
 				.cra_name = "hmac(sha224)",
 				.cra_driver_name = "safexcel-hmac-sha224",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = SHA224_BLOCK_SIZE,
@@ -1256,10 +1304,23 @@
 
 static int safexcel_hmac_sha256_init(struct ahash_request *areq)
 {
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 
-	safexcel_sha256_init(areq);
-	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	memset(req, 0, sizeof(*req));
+
+	/* Start from ipad precompute */
+	memcpy(req->state, ctx->ipad, SHA256_DIGEST_SIZE);
+	/* Already processed the key^ipad part now! */
+	req->len	= SHA256_BLOCK_SIZE;
+	req->processed	= SHA256_BLOCK_SIZE;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA256;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA256_DIGEST_SIZE;
+	req->block_sz = SHA256_BLOCK_SIZE;
+	req->hmac = true;
+
 	return 0;
 }
 
@@ -1275,7 +1336,7 @@
 
 struct safexcel_alg_template safexcel_alg_hmac_sha256 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_SHA2_256,
 	.alg.ahash = {
 		.init = safexcel_hmac_sha256_init,
 		.update = safexcel_ahash_update,
@@ -1291,7 +1352,7 @@
 			.base = {
 				.cra_name = "hmac(sha256)",
 				.cra_driver_name = "safexcel-hmac-sha256",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = SHA256_BLOCK_SIZE,
@@ -1311,26 +1372,10 @@
 
 	memset(req, 0, sizeof(*req));
 
-	req->state[0] = lower_32_bits(SHA512_H0);
-	req->state[1] = upper_32_bits(SHA512_H0);
-	req->state[2] = lower_32_bits(SHA512_H1);
-	req->state[3] = upper_32_bits(SHA512_H1);
-	req->state[4] = lower_32_bits(SHA512_H2);
-	req->state[5] = upper_32_bits(SHA512_H2);
-	req->state[6] = lower_32_bits(SHA512_H3);
-	req->state[7] = upper_32_bits(SHA512_H3);
-	req->state[8] = lower_32_bits(SHA512_H4);
-	req->state[9] = upper_32_bits(SHA512_H4);
-	req->state[10] = lower_32_bits(SHA512_H5);
-	req->state[11] = upper_32_bits(SHA512_H5);
-	req->state[12] = lower_32_bits(SHA512_H6);
-	req->state[13] = upper_32_bits(SHA512_H6);
-	req->state[14] = lower_32_bits(SHA512_H7);
-	req->state[15] = upper_32_bits(SHA512_H7);
-
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA512;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA512_DIGEST_SIZE;
+	req->block_sz = SHA512_BLOCK_SIZE;
 
 	return 0;
 }
@@ -1347,7 +1392,7 @@
 
 struct safexcel_alg_template safexcel_alg_sha512 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_SHA2_512,
 	.alg.ahash = {
 		.init = safexcel_sha512_init,
 		.update = safexcel_ahash_update,
@@ -1362,7 +1407,7 @@
 			.base = {
 				.cra_name = "sha512",
 				.cra_driver_name = "safexcel-sha512",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = SHA512_BLOCK_SIZE,
@@ -1382,26 +1427,10 @@
 
 	memset(req, 0, sizeof(*req));
 
-	req->state[0] = lower_32_bits(SHA384_H0);
-	req->state[1] = upper_32_bits(SHA384_H0);
-	req->state[2] = lower_32_bits(SHA384_H1);
-	req->state[3] = upper_32_bits(SHA384_H1);
-	req->state[4] = lower_32_bits(SHA384_H2);
-	req->state[5] = upper_32_bits(SHA384_H2);
-	req->state[6] = lower_32_bits(SHA384_H3);
-	req->state[7] = upper_32_bits(SHA384_H3);
-	req->state[8] = lower_32_bits(SHA384_H4);
-	req->state[9] = upper_32_bits(SHA384_H4);
-	req->state[10] = lower_32_bits(SHA384_H5);
-	req->state[11] = upper_32_bits(SHA384_H5);
-	req->state[12] = lower_32_bits(SHA384_H6);
-	req->state[13] = upper_32_bits(SHA384_H6);
-	req->state[14] = lower_32_bits(SHA384_H7);
-	req->state[15] = upper_32_bits(SHA384_H7);
-
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA384;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = SHA512_DIGEST_SIZE;
+	req->block_sz = SHA512_BLOCK_SIZE;
 
 	return 0;
 }
@@ -1418,7 +1447,7 @@
 
 struct safexcel_alg_template safexcel_alg_sha384 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_SHA2_512,
 	.alg.ahash = {
 		.init = safexcel_sha384_init,
 		.update = safexcel_ahash_update,
@@ -1433,7 +1462,7 @@
 			.base = {
 				.cra_name = "sha384",
 				.cra_driver_name = "safexcel-sha384",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = SHA384_BLOCK_SIZE,
@@ -1455,10 +1484,23 @@
 
 static int safexcel_hmac_sha512_init(struct ahash_request *areq)
 {
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 
-	safexcel_sha512_init(areq);
-	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	memset(req, 0, sizeof(*req));
+
+	/* Start from ipad precompute */
+	memcpy(req->state, ctx->ipad, SHA512_DIGEST_SIZE);
+	/* Already processed the key^ipad part now! */
+	req->len	= SHA512_BLOCK_SIZE;
+	req->processed	= SHA512_BLOCK_SIZE;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA512;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA512_DIGEST_SIZE;
+	req->block_sz = SHA512_BLOCK_SIZE;
+	req->hmac = true;
+
 	return 0;
 }
 
@@ -1474,7 +1516,7 @@
 
 struct safexcel_alg_template safexcel_alg_hmac_sha512 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_SHA2_512,
 	.alg.ahash = {
 		.init = safexcel_hmac_sha512_init,
 		.update = safexcel_ahash_update,
@@ -1490,7 +1532,7 @@
 			.base = {
 				.cra_name = "hmac(sha512)",
 				.cra_driver_name = "safexcel-hmac-sha512",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = SHA512_BLOCK_SIZE,
@@ -1512,10 +1554,23 @@
 
 static int safexcel_hmac_sha384_init(struct ahash_request *areq)
 {
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 
-	safexcel_sha384_init(areq);
-	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	memset(req, 0, sizeof(*req));
+
+	/* Start from ipad precompute */
+	memcpy(req->state, ctx->ipad, SHA512_DIGEST_SIZE);
+	/* Already processed the key^ipad part now! */
+	req->len	= SHA512_BLOCK_SIZE;
+	req->processed	= SHA512_BLOCK_SIZE;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_SHA384;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = SHA512_DIGEST_SIZE;
+	req->block_sz = SHA512_BLOCK_SIZE;
+	req->hmac = true;
+
 	return 0;
 }
 
@@ -1531,7 +1586,7 @@
 
 struct safexcel_alg_template safexcel_alg_hmac_sha384 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_SHA2_512,
 	.alg.ahash = {
 		.init = safexcel_hmac_sha384_init,
 		.update = safexcel_ahash_update,
@@ -1547,7 +1602,7 @@
 			.base = {
 				.cra_name = "hmac(sha384)",
 				.cra_driver_name = "safexcel-hmac-sha384",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = SHA384_BLOCK_SIZE,
@@ -1567,14 +1622,10 @@
 
 	memset(req, 0, sizeof(*req));
 
-	req->state[0] = MD5_H0;
-	req->state[1] = MD5_H1;
-	req->state[2] = MD5_H2;
-	req->state[3] = MD5_H3;
-
 	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_MD5;
 	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
 	req->state_sz = MD5_DIGEST_SIZE;
+	req->block_sz = MD5_HMAC_BLOCK_SIZE;
 
 	return 0;
 }
@@ -1591,7 +1642,7 @@
 
 struct safexcel_alg_template safexcel_alg_md5 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_MD5,
 	.alg.ahash = {
 		.init = safexcel_md5_init,
 		.update = safexcel_ahash_update,
@@ -1606,7 +1657,7 @@
 			.base = {
 				.cra_name = "md5",
 				.cra_driver_name = "safexcel-md5",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
@@ -1621,10 +1672,24 @@
 
 static int safexcel_hmac_md5_init(struct ahash_request *areq)
 {
+	struct safexcel_ahash_ctx *ctx = crypto_ahash_ctx(crypto_ahash_reqtfm(areq));
 	struct safexcel_ahash_req *req = ahash_request_ctx(areq);
 
-	safexcel_md5_init(areq);
-	req->digest = CONTEXT_CONTROL_DIGEST_HMAC;
+	memset(req, 0, sizeof(*req));
+
+	/* Start from ipad precompute */
+	memcpy(req->state, ctx->ipad, MD5_DIGEST_SIZE);
+	/* Already processed the key^ipad part now! */
+	req->len	= MD5_HMAC_BLOCK_SIZE;
+	req->processed	= MD5_HMAC_BLOCK_SIZE;
+
+	ctx->alg = CONTEXT_CONTROL_CRYPTO_ALG_MD5;
+	req->digest = CONTEXT_CONTROL_DIGEST_PRECOMPUTED;
+	req->state_sz = MD5_DIGEST_SIZE;
+	req->block_sz = MD5_HMAC_BLOCK_SIZE;
+	req->len_is_le = true; /* MD5 is little endian! ... */
+	req->hmac = true;
+
 	return 0;
 }
 
@@ -1647,7 +1712,7 @@
 
 struct safexcel_alg_template safexcel_alg_hmac_md5 = {
 	.type = SAFEXCEL_ALG_TYPE_AHASH,
-	.engines = EIP97IES | EIP197B | EIP197D,
+	.algo_mask = SAFEXCEL_ALG_MD5,
 	.alg.ahash = {
 		.init = safexcel_hmac_md5_init,
 		.update = safexcel_ahash_update,
@@ -1663,7 +1728,7 @@
 			.base = {
 				.cra_name = "hmac(md5)",
 				.cra_driver_name = "safexcel-hmac-md5",
-				.cra_priority = 300,
+				.cra_priority = SAFEXCEL_CRA_PRIORITY,
 				.cra_flags = CRYPTO_ALG_ASYNC |
 					     CRYPTO_ALG_KERN_DRIVER_ONLY,
 				.cra_blocksize = MD5_HMAC_BLOCK_SIZE,
diff --git a/drivers/crypto/inside-secure/safexcel_ring.c b/drivers/crypto/inside-secure/safexcel_ring.c
index 142bc3f..0f269b8 100644
--- a/drivers/crypto/inside-secure/safexcel_ring.c
+++ b/drivers/crypto/inside-secure/safexcel_ring.c
@@ -137,7 +137,13 @@
 		struct safexcel_token *token =
 			(struct safexcel_token *)cdesc->control_data.token;
 
-		cdesc->control_data.packet_length = full_data_len;
+		/*
+		 * Note that the length here MUST be >0 or else the EIP(1)97
+		 * may hang. Newer EIP197 firmware actually incorporates this
+		 * fix already, but that doesn't help the EIP97 and we may
+		 * also be running older firmware.
+		 */
+		cdesc->control_data.packet_length = full_data_len ?: 1;
 		cdesc->control_data.options = EIP197_OPTION_MAGIC_VALUE |
 					      EIP197_OPTION_64BIT_CTX |
 					      EIP197_OPTION_CTX_CTRL_IN_CMD;
@@ -145,7 +151,8 @@
 			(lower_32_bits(context) & GENMASK(31, 2)) >> 2;
 		cdesc->control_data.context_hi = upper_32_bits(context);
 
-		if (priv->version == EIP197B || priv->version == EIP197D)
+		if (priv->version == EIP197B_MRVL ||
+		    priv->version == EIP197D_MRVL)
 			cdesc->control_data.options |= EIP197_OPTION_RC_AUTO;
 
 		/* TODO: large xform HMAC with SHA-384/512 uses refresh = 3 */
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index acedafe..9181523 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -17,7 +17,7 @@
 #include <linux/module.h>
 
 #include <crypto/ctr.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/aes.h>
 #include <crypto/hmac.h>
 #include <crypto/sha.h>
@@ -756,10 +756,7 @@
 		}
 		cipher_cfg |= keylen_cfg;
 	} else {
-		u32 tmp[DES_EXPKEY_WORDS];
-		if (des_ekey(tmp, key) == 0) {
-			*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		}
+		crypto_des_verify_key(tfm, key);
 	}
 	/* write cfg word to cryptinfo */
 	*(u32*)cinfo = cpu_to_be32(cipher_cfg);
@@ -851,14 +848,8 @@
 static int ablk_des3_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 			    unsigned int key_len)
 {
-	u32 flags = crypto_ablkcipher_get_flags(tfm);
-	int err;
-
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err))
-		crypto_ablkcipher_set_flags(tfm, flags);
-
-	return ablk_setkey(tfm, key, key_len);
+	return verify_ablkcipher_des3_key(tfm, key) ?:
+	       ablk_setkey(tfm, key, key_len);
 }
 
 static int ablk_rfc3686_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
@@ -1181,7 +1172,6 @@
 			    unsigned int keylen)
 {
 	struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
-	u32 flags = CRYPTO_TFM_RES_BAD_KEY_LEN;
 	struct crypto_authenc_keys keys;
 	int err;
 
@@ -1193,12 +1183,8 @@
 	if (keys.authkeylen > sizeof(ctx->authkey))
 		goto badkey;
 
-	if (keys.enckeylen != DES3_EDE_KEY_SIZE)
-		goto badkey;
-
-	flags = crypto_aead_get_flags(tfm);
-	err = __des3_verify_key(&flags, keys.enckey);
-	if (unlikely(err))
+	err = verify_aead_des3_key(tfm, keys.enckey, keys.enckeylen);
+	if (err)
 		goto badkey;
 
 	memcpy(ctx->authkey, keys.authkey, keys.authkeylen);
@@ -1209,7 +1195,6 @@
 	memzero_explicit(&keys, sizeof(keys));
 	return aead_setup(tfm, crypto_aead_authsize(tfm));
 badkey:
-	crypto_aead_set_flags(tfm, flags);
 	memzero_explicit(&keys, sizeof(keys));
 	return err;
 }
diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c
index f4321f3..84ceddf 100644
--- a/drivers/crypto/marvell/cipher.c
+++ b/drivers/crypto/marvell/cipher.c
@@ -10,7 +10,7 @@
  */
 
 #include <crypto/aes.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 
 #include "cesa.h"
 
@@ -254,7 +254,7 @@
 	int ret;
 	int i;
 
-	ret = crypto_aes_expand_key(&ctx->aes, key, len);
+	ret = aes_expandkey(&ctx->aes, key, len);
 	if (ret) {
 		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
 		return ret;
@@ -272,21 +272,12 @@
 static int mv_cesa_des_setkey(struct crypto_skcipher *cipher, const u8 *key,
 			      unsigned int len)
 {
-	struct crypto_tfm *tfm = crypto_skcipher_tfm(cipher);
-	struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 tmp[DES_EXPKEY_WORDS];
-	int ret;
+	struct mv_cesa_des_ctx *ctx = crypto_skcipher_ctx(cipher);
+	int err;
 
-	if (len != DES_KEY_SIZE) {
-		crypto_skcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	ret = des_ekey(tmp, key);
-	if (!ret && (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
+	err = verify_skcipher_des_key(cipher, key);
+	if (err)
+		return err;
 
 	memcpy(ctx->key, key, DES_KEY_SIZE);
 
@@ -299,8 +290,8 @@
 	struct mv_cesa_des_ctx *ctx = crypto_skcipher_ctx(cipher);
 	int err;
 
-	err = des3_verify_key(cipher, key);
-	if (unlikely(err))
+	err = verify_skcipher_des3_key(cipher, key);
+	if (err)
 		return err;
 
 	memcpy(ctx->key, key, DES3_EDE_KEY_SIZE);
diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c
index 0f0ac85..a2b35fb 100644
--- a/drivers/crypto/marvell/hash.c
+++ b/drivers/crypto/marvell/hash.c
@@ -1148,8 +1148,7 @@
 		}
 
 		/* Set the memory region to 0 to avoid any leak. */
-		memset(keydup, 0, keylen);
-		kfree(keydup);
+		kzfree(keydup);
 
 		if (ret)
 			return ret;
diff --git a/drivers/crypto/mediatek/mtk-aes.c b/drivers/crypto/mediatek/mtk-aes.c
index b7477ee..90c9644 100644
--- a/drivers/crypto/mediatek/mtk-aes.c
+++ b/drivers/crypto/mediatek/mtk-aes.c
@@ -23,7 +23,7 @@
 
 #define AES_CT_CTRL_HDR		cpu_to_le32(0x00220000)
 
-/* AES-CBC/ECB/CTR command token */
+/* AES-CBC/ECB/CTR/OFB/CFB command token */
 #define AES_CMD0		cpu_to_le32(0x05000000)
 #define AES_CMD1		cpu_to_le32(0x2d060000)
 #define AES_CMD2		cpu_to_le32(0xe4a63806)
@@ -50,6 +50,8 @@
 /* AES transform information word 1 fields */
 #define AES_TFM_ECB		cpu_to_le32(0x0 << 0)
 #define AES_TFM_CBC		cpu_to_le32(0x1 << 0)
+#define AES_TFM_OFB		cpu_to_le32(0x4 << 0)
+#define AES_TFM_CFB128		cpu_to_le32(0x5 << 0)
 #define AES_TFM_CTR_INIT	cpu_to_le32(0x2 << 0)	/* init counter to 1 */
 #define AES_TFM_CTR_LOAD	cpu_to_le32(0x6 << 0)	/* load/reuse counter */
 #define AES_TFM_3IV		cpu_to_le32(0x7 << 5)	/* using IV 0-2 */
@@ -58,13 +60,15 @@
 #define AES_TFM_ENC_HASH	cpu_to_le32(0x1 << 17)
 
 /* AES flags */
-#define AES_FLAGS_CIPHER_MSK	GENMASK(2, 0)
+#define AES_FLAGS_CIPHER_MSK	GENMASK(4, 0)
 #define AES_FLAGS_ECB		BIT(0)
 #define AES_FLAGS_CBC		BIT(1)
 #define AES_FLAGS_CTR		BIT(2)
-#define AES_FLAGS_GCM		BIT(3)
-#define AES_FLAGS_ENCRYPT	BIT(4)
-#define AES_FLAGS_BUSY		BIT(5)
+#define AES_FLAGS_OFB		BIT(3)
+#define AES_FLAGS_CFB128	BIT(4)
+#define AES_FLAGS_GCM		BIT(5)
+#define AES_FLAGS_ENCRYPT	BIT(6)
+#define AES_FLAGS_BUSY		BIT(7)
 
 #define AES_AUTH_TAG_ERR	cpu_to_le32(BIT(26))
 
@@ -101,6 +105,7 @@
 struct mtk_aes_base_ctx {
 	struct mtk_cryp *cryp;
 	u32 keylen;
+	__le32 key[12];
 	__le32 keymode;
 
 	mtk_aes_fn start;
@@ -405,7 +410,7 @@
 	return mtk_aes_complete(cryp, aes, -EINVAL);
 }
 
-/* Initialize transform information of CBC/ECB/CTR mode */
+/* Initialize transform information of CBC/ECB/CTR/OFB/CFB mode */
 static void mtk_aes_info_init(struct mtk_cryp *cryp, struct mtk_aes_rec *aes,
 			      size_t len)
 {
@@ -434,7 +439,12 @@
 	case AES_FLAGS_CTR:
 		info->tfm[1] = AES_TFM_CTR_LOAD;
 		goto ctr;
-
+	case AES_FLAGS_OFB:
+		info->tfm[1] = AES_TFM_OFB;
+		break;
+	case AES_FLAGS_CFB128:
+		info->tfm[1] = AES_TFM_CFB128;
+		break;
 	default:
 		/* Should not happen... */
 		return;
@@ -525,6 +535,8 @@
 		backlog->complete(backlog, -EINPROGRESS);
 
 	ctx = crypto_tfm_ctx(areq->tfm);
+	/* Write key into state buffer */
+	memcpy(ctx->info.state, ctx->key, sizeof(ctx->key));
 
 	aes->areq = areq;
 	aes->ctx = ctx;
@@ -644,21 +656,26 @@
 	}
 
 	ctx->keylen = SIZE_IN_WORDS(keylen);
-	mtk_aes_write_state_le(ctx->info.state, (const u32 *)key, keylen);
+	mtk_aes_write_state_le(ctx->key, (const u32 *)key, keylen);
 
 	return 0;
 }
 
 static int mtk_aes_crypt(struct ablkcipher_request *req, u64 mode)
 {
-	struct mtk_aes_base_ctx *ctx;
+	struct crypto_ablkcipher *ablkcipher = crypto_ablkcipher_reqtfm(req);
+	struct mtk_aes_base_ctx *ctx = crypto_ablkcipher_ctx(ablkcipher);
 	struct mtk_aes_reqctx *rctx;
+	struct mtk_cryp *cryp;
 
-	ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req));
+	cryp = mtk_aes_find_dev(ctx);
+	if (!cryp)
+		return -ENODEV;
+
 	rctx = ablkcipher_request_ctx(req);
 	rctx->mode = mode;
 
-	return mtk_aes_handle_queue(ctx->cryp, !(mode & AES_FLAGS_ENCRYPT),
+	return mtk_aes_handle_queue(cryp, !(mode & AES_FLAGS_ENCRYPT),
 				    &req->base);
 }
 
@@ -692,16 +709,29 @@
 	return mtk_aes_crypt(req, AES_FLAGS_CTR);
 }
 
+static int mtk_aes_ofb_encrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_OFB);
+}
+
+static int mtk_aes_ofb_decrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_OFB);
+}
+
+static int mtk_aes_cfb_encrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_ENCRYPT | AES_FLAGS_CFB128);
+}
+
+static int mtk_aes_cfb_decrypt(struct ablkcipher_request *req)
+{
+	return mtk_aes_crypt(req, AES_FLAGS_CFB128);
+}
+
 static int mtk_aes_cra_init(struct crypto_tfm *tfm)
 {
 	struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct mtk_cryp *cryp = NULL;
-
-	cryp = mtk_aes_find_dev(&ctx->base);
-	if (!cryp) {
-		pr_err("can't find crypto device\n");
-		return -ENODEV;
-	}
 
 	tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx);
 	ctx->base.start = mtk_aes_start;
@@ -711,13 +741,6 @@
 static int mtk_aes_ctr_cra_init(struct crypto_tfm *tfm)
 {
 	struct mtk_aes_ctx *ctx = crypto_tfm_ctx(tfm);
-	struct mtk_cryp *cryp = NULL;
-
-	cryp = mtk_aes_find_dev(&ctx->base);
-	if (!cryp) {
-		pr_err("can't find crypto device\n");
-		return -ENODEV;
-	}
 
 	tfm->crt_ablkcipher.reqsize = sizeof(struct mtk_aes_reqctx);
 	ctx->base.start = mtk_aes_ctr_start;
@@ -787,6 +810,48 @@
 		.decrypt	= mtk_aes_ctr_decrypt,
 	}
 },
+{
+	.cra_name		= "ofb(aes)",
+	.cra_driver_name	= "ofb-aes-mtk",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_init		= mtk_aes_cra_init,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct mtk_aes_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= mtk_aes_setkey,
+		.encrypt	= mtk_aes_ofb_encrypt,
+		.decrypt	= mtk_aes_ofb_decrypt,
+	}
+},
+{
+	.cra_name		= "cfb(aes)",
+	.cra_driver_name	= "cfb-aes-mtk",
+	.cra_priority		= 400,
+	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
+				  CRYPTO_ALG_ASYNC,
+	.cra_init		= mtk_aes_cra_init,
+	.cra_blocksize		= 1,
+	.cra_ctxsize		= sizeof(struct mtk_aes_ctx),
+	.cra_alignmask		= 0xf,
+	.cra_type		= &crypto_ablkcipher_type,
+	.cra_module		= THIS_MODULE,
+	.cra_u.ablkcipher = {
+		.min_keysize	= AES_MIN_KEY_SIZE,
+		.max_keysize	= AES_MAX_KEY_SIZE,
+		.ivsize		= AES_BLOCK_SIZE,
+		.setkey		= mtk_aes_setkey,
+		.encrypt	= mtk_aes_cfb_encrypt,
+		.decrypt	= mtk_aes_cfb_decrypt,
+	}
+},
 };
 
 static inline struct mtk_aes_gcm_ctx *
@@ -905,14 +970,11 @@
 		aes->resume = mtk_aes_transfer_complete;
 		/* Compute total process length. */
 		aes->total = len + gctx->authsize;
-		/* Compute text length. */
-		gctx->textlen = req->cryptlen;
 		/* Hardware will append authenticated tag to output buffer */
 		scatterwalk_map_and_copy(tag, req->dst, len, gctx->authsize, 1);
 	} else {
 		aes->resume = mtk_aes_gcm_tag_verify;
 		aes->total = len;
-		gctx->textlen = req->cryptlen - gctx->authsize;
 	}
 
 	return mtk_aes_gcm_dma(cryp, aes, req->src, req->dst, len);
@@ -923,6 +985,15 @@
 	struct mtk_aes_base_ctx *ctx = crypto_aead_ctx(crypto_aead_reqtfm(req));
 	struct mtk_aes_gcm_ctx *gctx = mtk_aes_gcm_ctx_cast(ctx);
 	struct mtk_aes_reqctx *rctx = aead_request_ctx(req);
+	struct mtk_cryp *cryp;
+	bool enc = !!(mode & AES_FLAGS_ENCRYPT);
+
+	cryp = mtk_aes_find_dev(ctx);
+	if (!cryp)
+		return -ENODEV;
+
+	/* Compute text length. */
+	gctx->textlen = req->cryptlen - (enc ? 0 : gctx->authsize);
 
 	/* Empty messages are not supported yet */
 	if (!gctx->textlen && !req->assoclen)
@@ -930,8 +1001,7 @@
 
 	rctx->mode = AES_FLAGS_GCM | mode;
 
-	return mtk_aes_handle_queue(ctx->cryp, !!(mode & AES_FLAGS_ENCRYPT),
-				    &req->base);
+	return mtk_aes_handle_queue(cryp, enc, &req->base);
 }
 
 /*
@@ -1003,10 +1073,8 @@
 	if (err)
 		goto out;
 
-	/* Write key into state buffer */
-	mtk_aes_write_state_le(ctx->info.state, (const u32 *)key, keylen);
-	/* Write key(H) into state buffer */
-	mtk_aes_write_state_be(ctx->info.state + ctx->keylen, data->hash,
+	mtk_aes_write_state_le(ctx->key, (const u32 *)key, keylen);
+	mtk_aes_write_state_be(ctx->key + ctx->keylen, data->hash,
 			       AES_BLOCK_SIZE);
 out:
 	kzfree(data);
@@ -1046,13 +1114,6 @@
 static int mtk_aes_gcm_init(struct crypto_aead *aead)
 {
 	struct mtk_aes_gcm_ctx *ctx = crypto_aead_ctx(aead);
-	struct mtk_cryp *cryp = NULL;
-
-	cryp = mtk_aes_find_dev(&ctx->base);
-	if (!cryp) {
-		pr_err("can't find crypto device\n");
-		return -ENODEV;
-	}
 
 	ctx->ctr = crypto_alloc_skcipher("ctr(aes)", 0,
 					 CRYPTO_ALG_ASYNC);
diff --git a/drivers/crypto/mediatek/mtk-platform.c b/drivers/crypto/mediatek/mtk-platform.c
index 125318a..7e3ad08 100644
--- a/drivers/crypto/mediatek/mtk-platform.c
+++ b/drivers/crypto/mediatek/mtk-platform.c
@@ -481,7 +481,6 @@
 
 static int mtk_crypto_probe(struct platform_device *pdev)
 {
-	struct resource *res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	struct mtk_cryp *cryp;
 	int i, err;
 
@@ -489,16 +488,14 @@
 	if (!cryp)
 		return -ENOMEM;
 
-	cryp->base = devm_ioremap_resource(&pdev->dev, res);
+	cryp->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(cryp->base))
 		return PTR_ERR(cryp->base);
 
 	for (i = 0; i < MTK_IRQ_NUM; i++) {
 		cryp->irq[i] = platform_get_irq(pdev, i);
-		if (cryp->irq[i] < 0) {
-			dev_err(cryp->dev, "no IRQ:%d resource info\n", i);
+		if (cryp->irq[i] < 0)
 			return cryp->irq[i];
-		}
 	}
 
 	cryp->clk_cryp = devm_clk_get(&pdev->dev, "cryp");
diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c
index f03b0f0..9e9f48b 100644
--- a/drivers/crypto/mediatek/mtk-sha.c
+++ b/drivers/crypto/mediatek/mtk-sha.c
@@ -778,7 +778,9 @@
 	ctx->flags |= SHA_FLAGS_FINUP;
 
 	err1 = mtk_sha_update(req);
-	if (err1 == -EINPROGRESS || err1 == -EBUSY)
+	if (err1 == -EINPROGRESS ||
+	    (err1 == -EBUSY && (ahash_request_flags(req) &
+				CRYPTO_TFM_REQ_MAY_BACKLOG)))
 		return err1;
 	/*
 	 * final() has to be always called to cleanup resources
diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c
index f1fa637..bf8d219 100644
--- a/drivers/crypto/mxs-dcp.c
+++ b/drivers/crypto/mxs-dcp.c
@@ -994,16 +994,12 @@
 	}
 
 	dcp_vmi_irq = platform_get_irq(pdev, 0);
-	if (dcp_vmi_irq < 0) {
-		dev_err(dev, "Failed to get IRQ: (%d)!\n", dcp_vmi_irq);
+	if (dcp_vmi_irq < 0)
 		return dcp_vmi_irq;
-	}
 
 	dcp_irq = platform_get_irq(pdev, 1);
-	if (dcp_irq < 0) {
-		dev_err(dev, "Failed to get IRQ: (%d)!\n", dcp_irq);
+	if (dcp_irq < 0)
 		return dcp_irq;
-	}
 
 	sdcp = devm_kzalloc(dev, sizeof(*sdcp), GFP_KERNEL);
 	if (!sdcp)
diff --git a/drivers/crypto/n2_core.c b/drivers/crypto/n2_core.c
index 760e72a..dc15b06 100644
--- a/drivers/crypto/n2_core.c
+++ b/drivers/crypto/n2_core.c
@@ -17,7 +17,7 @@
 #include <crypto/md5.h>
 #include <crypto/sha.h>
 #include <crypto/aes.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <linux/mutex.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
@@ -760,22 +760,14 @@
 	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
 	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
 	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
-	u32 tmp[DES_EXPKEY_WORDS];
 	int err;
 
+	err = verify_ablkcipher_des_key(cipher, key);
+	if (err)
+		return err;
+
 	ctx->enc_type = n2alg->enc_type;
 
-	if (keylen != DES_KEY_SIZE) {
-		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	err = des_ekey(tmp, key);
-	if (err == 0 && (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
-
 	ctx->key_len = keylen;
 	memcpy(ctx->key.des, key, keylen);
 	return 0;
@@ -787,15 +779,11 @@
 	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
 	struct n2_cipher_context *ctx = crypto_tfm_ctx(tfm);
 	struct n2_cipher_alg *n2alg = n2_cipher_alg(tfm);
-	u32 flags;
 	int err;
 
-	flags = crypto_ablkcipher_get_flags(cipher);
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(cipher, flags);
+	err = verify_ablkcipher_des3_key(cipher, key);
+	if (err)
 		return err;
-	}
 
 	ctx->enc_type = n2alg->enc_type;
 
@@ -1295,20 +1283,20 @@
 	u8		hmac_type;
 };
 
-static const u32 md5_init[MD5_HASH_WORDS] = {
+static const u32 n2_md5_init[MD5_HASH_WORDS] = {
 	cpu_to_le32(MD5_H0),
 	cpu_to_le32(MD5_H1),
 	cpu_to_le32(MD5_H2),
 	cpu_to_le32(MD5_H3),
 };
-static const u32 sha1_init[SHA1_DIGEST_SIZE / 4] = {
+static const u32 n2_sha1_init[SHA1_DIGEST_SIZE / 4] = {
 	SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4,
 };
-static const u32 sha256_init[SHA256_DIGEST_SIZE / 4] = {
+static const u32 n2_sha256_init[SHA256_DIGEST_SIZE / 4] = {
 	SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
 	SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7,
 };
-static const u32 sha224_init[SHA256_DIGEST_SIZE / 4] = {
+static const u32 n2_sha224_init[SHA256_DIGEST_SIZE / 4] = {
 	SHA224_H0, SHA224_H1, SHA224_H2, SHA224_H3,
 	SHA224_H4, SHA224_H5, SHA224_H6, SHA224_H7,
 };
@@ -1316,7 +1304,7 @@
 static const struct n2_hash_tmpl hash_tmpls[] = {
 	{ .name		= "md5",
 	  .hash_zero	= md5_zero_message_hash,
-	  .hash_init	= md5_init,
+	  .hash_init	= n2_md5_init,
 	  .auth_type	= AUTH_TYPE_MD5,
 	  .hmac_type	= AUTH_TYPE_HMAC_MD5,
 	  .hw_op_hashsz	= MD5_DIGEST_SIZE,
@@ -1324,7 +1312,7 @@
 	  .block_size	= MD5_HMAC_BLOCK_SIZE },
 	{ .name		= "sha1",
 	  .hash_zero	= sha1_zero_message_hash,
-	  .hash_init	= sha1_init,
+	  .hash_init	= n2_sha1_init,
 	  .auth_type	= AUTH_TYPE_SHA1,
 	  .hmac_type	= AUTH_TYPE_HMAC_SHA1,
 	  .hw_op_hashsz	= SHA1_DIGEST_SIZE,
@@ -1332,7 +1320,7 @@
 	  .block_size	= SHA1_BLOCK_SIZE },
 	{ .name		= "sha256",
 	  .hash_zero	= sha256_zero_message_hash,
-	  .hash_init	= sha256_init,
+	  .hash_init	= n2_sha256_init,
 	  .auth_type	= AUTH_TYPE_SHA256,
 	  .hmac_type	= AUTH_TYPE_HMAC_SHA256,
 	  .hw_op_hashsz	= SHA256_DIGEST_SIZE,
@@ -1340,7 +1328,7 @@
 	  .block_size	= SHA256_BLOCK_SIZE },
 	{ .name		= "sha224",
 	  .hash_zero	= sha224_zero_message_hash,
-	  .hash_init	= sha224_init,
+	  .hash_init	= n2_sha224_init,
 	  .auth_type	= AUTH_TYPE_SHA256,
 	  .hmac_type	= AUTH_TYPE_RESERVED,
 	  .hw_op_hashsz	= SHA256_DIGEST_SIZE,
diff --git a/drivers/crypto/nx/nx-842-powernv.c b/drivers/crypto/nx/nx-842-powernv.c
index e78ff5c..c037a24 100644
--- a/drivers/crypto/nx/nx-842-powernv.c
+++ b/drivers/crypto/nx/nx-842-powernv.c
@@ -1020,6 +1020,7 @@
 		ret = nx842_powernv_probe_vas(dn);
 		if (ret) {
 			nx842_delete_coprocs();
+			of_node_put(dn);
 			return ret;
 		}
 	}
diff --git a/drivers/crypto/nx/nx.h b/drivers/crypto/nx/nx.h
index c6b5a3b..7ecca16 100644
--- a/drivers/crypto/nx/nx.h
+++ b/drivers/crypto/nx/nx.h
@@ -9,9 +9,6 @@
 #define NX_STRING	"IBM Power7+ Nest Accelerator Crypto Driver"
 #define NX_VERSION	"1.0"
 
-static const char nx_driver_string[] = NX_STRING;
-static const char nx_driver_version[] = NX_VERSION;
-
 /* a scatterlist in the format PHYP is expecting */
 struct nx_sg {
 	u64 addr;
diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c
index 45a4647..2f53fbb 100644
--- a/drivers/crypto/omap-aes.c
+++ b/drivers/crypto/omap-aes.c
@@ -1180,7 +1180,6 @@
 
 		irq = platform_get_irq(pdev, 0);
 		if (irq < 0) {
-			dev_err(dev, "can't get IRQ resource\n");
 			err = irq;
 			goto err_irq;
 		}
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index 1ee69a9..b19d7e5d 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -33,7 +33,7 @@
 #include <linux/crypto.h>
 #include <linux/interrupt.h>
 #include <crypto/scatterwalk.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/algapi.h>
 #include <crypto/engine.h>
 
@@ -650,20 +650,13 @@
 			   unsigned int keylen)
 {
 	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
+	int err;
 
 	pr_debug("enter, keylen: %d\n", keylen);
 
-	/* Do we need to test against weak key? */
-	if (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) {
-		u32 tmp[DES_EXPKEY_WORDS];
-		int ret = des_ekey(tmp, key);
-
-		if (!ret) {
-			tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
-			return -EINVAL;
-		}
-	}
+	err = verify_ablkcipher_des_key(cipher, key);
+	if (err)
+		return err;
 
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
@@ -672,20 +665,16 @@
 }
 
 static int omap_des3_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
-			   unsigned int keylen)
+			    unsigned int keylen)
 {
 	struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	u32 flags;
 	int err;
 
 	pr_debug("enter, keylen: %d\n", keylen);
 
-	flags = crypto_ablkcipher_get_flags(cipher);
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(cipher, flags);
+	err = verify_ablkcipher_des3_key(cipher, key);
+	if (err)
 		return err;
-	}
 
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
@@ -1049,7 +1038,6 @@
 
 		irq = platform_get_irq(pdev, 0);
 		if (irq < 0) {
-			dev_err(dev, "can't get IRQ resource: %d\n", irq);
 			err = irq;
 			goto err_irq;
 		}
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index e8e2907..ac80bc6 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -1989,7 +1989,6 @@
 	/* Get the IRQ */
 	dd->irq = platform_get_irq(pdev, 0);
 	if (dd->irq < 0) {
-		dev_err(dev, "no IRQ resource info\n");
 		err = dd->irq;
 		goto err;
 	}
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index ad02013..8a06612 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -145,7 +145,7 @@
 	ctx->cword.encrypt.keygen = 1;
 	ctx->cword.decrypt.keygen = 1;
 
-	if (crypto_aes_expand_key(&gen_aes, in_key, key_len)) {
+	if (aes_expandkey(&gen_aes, in_key, key_len)) {
 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
 		return -EINVAL;
 	}
@@ -300,7 +300,7 @@
 	return iv;
 }
 
-static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void padlock_aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct aes_ctx *ctx = aes_ctx(tfm);
 
@@ -309,7 +309,7 @@
 	padlock_store_cword(&ctx->cword.encrypt);
 }
 
-static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
+static void padlock_aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct aes_ctx *ctx = aes_ctx(tfm);
 
@@ -332,8 +332,8 @@
 			.cia_min_keysize	=	AES_MIN_KEY_SIZE,
 			.cia_max_keysize	=	AES_MAX_KEY_SIZE,
 			.cia_setkey	   	= 	aes_set_key,
-			.cia_encrypt	 	=	aes_encrypt,
-			.cia_decrypt	  	=	aes_decrypt,
+			.cia_encrypt	 	=	padlock_aes_encrypt,
+			.cia_decrypt	  	=	padlock_aes_decrypt,
 		}
 	}
 };
diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c
index b985cb85..3cbefb4 100644
--- a/drivers/crypto/picoxcell_crypto.c
+++ b/drivers/crypto/picoxcell_crypto.c
@@ -6,7 +6,7 @@
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <crypto/authenc.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/md5.h>
 #include <crypto/sha.h>
 #include <crypto/internal/skcipher.h>
@@ -736,16 +736,12 @@
 static int spacc_des_setkey(struct crypto_ablkcipher *cipher, const u8 *key,
 			    unsigned int len)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 tmp[DES_EXPKEY_WORDS];
+	struct spacc_ablk_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	int err;
 
-	if (unlikely(!des_ekey(tmp, key)) &&
-	    (crypto_ablkcipher_get_flags(cipher) &
-	     CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
+	err = verify_ablkcipher_des_key(cipher, key);
+	if (err)
+		return err;
 
 	memcpy(ctx->key, key, len);
 	ctx->key_len = len;
@@ -761,15 +757,11 @@
 			     unsigned int len)
 {
 	struct spacc_ablk_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	u32 flags;
 	int err;
 
-	flags = crypto_ablkcipher_get_flags(cipher);
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(cipher, flags);
+	err = verify_ablkcipher_des3_key(cipher, key);
+	if (err)
 		return err;
-	}
 
 	memcpy(ctx->key, key, len);
 	ctx->key_len = len;
@@ -1624,7 +1616,7 @@
 static int spacc_probe(struct platform_device *pdev)
 {
 	int i, err, ret;
-	struct resource *mem, *irq;
+	struct resource *irq;
 	struct device_node *np = pdev->dev.of_node;
 	struct spacc_engine *engine = devm_kzalloc(&pdev->dev, sizeof(*engine),
 						   GFP_KERNEL);
@@ -1653,8 +1645,7 @@
 
 	engine->name = dev_name(&pdev->dev);
 
-	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	engine->regs = devm_ioremap_resource(&pdev->dev, mem);
+	engine->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(engine->regs))
 		return PTR_ERR(engine->regs);
 
diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h
index 5c4c0a2..d78f8d5 100644
--- a/drivers/crypto/qat/qat_common/adf_common_drv.h
+++ b/drivers/crypto/qat/qat_common/adf_common_drv.h
@@ -95,7 +95,7 @@
 
 static inline int get_current_node(void)
 {
-	return topology_physical_package_id(smp_processor_id());
+	return topology_physical_package_id(raw_smp_processor_id());
 }
 
 int adf_service_register(struct service_hndl *service);
diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c
index a976210..7a98bf5 100644
--- a/drivers/crypto/qce/ablkcipher.c
+++ b/drivers/crypto/qce/ablkcipher.c
@@ -7,7 +7,7 @@
 #include <linux/interrupt.h>
 #include <linux/types.h>
 #include <crypto/aes.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/internal/skcipher.h>
 
 #include "cipher.h"
@@ -154,27 +154,17 @@
 {
 	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(ablk);
 	struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
-	unsigned long flags = to_cipher_tmpl(tfm)->alg_flags;
 	int ret;
 
 	if (!key || !keylen)
 		return -EINVAL;
 
-	if (IS_AES(flags)) {
-		switch (keylen) {
-		case AES_KEYSIZE_128:
-		case AES_KEYSIZE_256:
-			break;
-		default:
-			goto fallback;
-		}
-	} else if (IS_DES(flags)) {
-		u32 tmp[DES_EXPKEY_WORDS];
-
-		ret = des_ekey(tmp, key);
-		if (!ret && (crypto_ablkcipher_get_flags(ablk) &
-			     CRYPTO_TFM_REQ_FORBID_WEAK_KEYS))
-			goto weakkey;
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+	case AES_KEYSIZE_256:
+		break;
+	default:
+		goto fallback;
 	}
 
 	ctx->enc_keylen = keylen;
@@ -185,24 +175,32 @@
 	if (!ret)
 		ctx->enc_keylen = keylen;
 	return ret;
-weakkey:
-	crypto_ablkcipher_set_flags(ablk, CRYPTO_TFM_RES_WEAK_KEY);
-	return -EINVAL;
+}
+
+static int qce_des_setkey(struct crypto_ablkcipher *ablk, const u8 *key,
+			  unsigned int keylen)
+{
+	struct qce_cipher_ctx *ctx = crypto_ablkcipher_ctx(ablk);
+	int err;
+
+	err = verify_ablkcipher_des_key(ablk, key);
+	if (err)
+		return err;
+
+	ctx->enc_keylen = keylen;
+	memcpy(ctx->enc_key, key, keylen);
+	return 0;
 }
 
 static int qce_des3_setkey(struct crypto_ablkcipher *ablk, const u8 *key,
 			   unsigned int keylen)
 {
 	struct qce_cipher_ctx *ctx = crypto_ablkcipher_ctx(ablk);
-	u32 flags;
 	int err;
 
-	flags = crypto_ablkcipher_get_flags(ablk);
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(ablk, flags);
+	err = verify_ablkcipher_des3_key(ablk, key);
+	if (err)
 		return err;
-	}
 
 	ctx->enc_keylen = keylen;
 	memcpy(ctx->enc_key, key, keylen);
@@ -374,8 +372,9 @@
 	alg->cra_ablkcipher.ivsize = def->ivsize;
 	alg->cra_ablkcipher.min_keysize = def->min_keysize;
 	alg->cra_ablkcipher.max_keysize = def->max_keysize;
-	alg->cra_ablkcipher.setkey = IS_3DES(def->flags) ?
-				     qce_des3_setkey : qce_ablkcipher_setkey;
+	alg->cra_ablkcipher.setkey = IS_3DES(def->flags) ? qce_des3_setkey :
+				     IS_DES(def->flags) ? qce_des_setkey :
+				     qce_ablkcipher_setkey;
 	alg->cra_ablkcipher.encrypt = qce_ablkcipher_encrypt;
 	alg->cra_ablkcipher.decrypt = qce_ablkcipher_decrypt;
 
diff --git a/drivers/crypto/qce/core.c b/drivers/crypto/qce/core.c
index ef1d74e..08d4ce3 100644
--- a/drivers/crypto/qce/core.c
+++ b/drivers/crypto/qce/core.c
@@ -167,7 +167,6 @@
 {
 	struct device *dev = &pdev->dev;
 	struct qce_device *qce;
-	struct resource *res;
 	int ret;
 
 	qce = devm_kzalloc(dev, sizeof(*qce), GFP_KERNEL);
@@ -177,8 +176,7 @@
 	qce->dev = dev;
 	platform_set_drvdata(pdev, qce);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	qce->base = devm_ioremap_resource(&pdev->dev, res);
+	qce->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(qce->base))
 		return PTR_ERR(qce->base);
 
diff --git a/drivers/crypto/qcom-rng.c b/drivers/crypto/qcom-rng.c
index e54249c..4730f84 100644
--- a/drivers/crypto/qcom-rng.c
+++ b/drivers/crypto/qcom-rng.c
@@ -153,7 +153,6 @@
 
 static int qcom_rng_probe(struct platform_device *pdev)
 {
-	struct resource *res;
 	struct qcom_rng *rng;
 	int ret;
 
@@ -164,8 +163,7 @@
 	platform_set_drvdata(pdev, rng);
 	mutex_init(&rng->lock);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	rng->base = devm_ioremap_resource(&pdev->dev, res);
+	rng->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(rng->base))
 		return PTR_ERR(rng->base);
 
diff --git a/drivers/crypto/rockchip/rk3288_crypto.c b/drivers/crypto/rockchip/rk3288_crypto.c
index 8d7e254..e5714ef 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.c
+++ b/drivers/crypto/rockchip/rk3288_crypto.c
@@ -311,7 +311,6 @@
 
 static int rk_crypto_probe(struct platform_device *pdev)
 {
-	struct resource *res;
 	struct device *dev = &pdev->dev;
 	struct rk_crypto_info *crypto_info;
 	int err = 0;
@@ -339,8 +338,7 @@
 
 	spin_lock_init(&crypto_info->lock);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	crypto_info->reg = devm_ioremap_resource(&pdev->dev, res);
+	crypto_info->reg = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(crypto_info->reg)) {
 		err = PTR_ERR(crypto_info->reg);
 		goto err_crypto;
diff --git a/drivers/crypto/rockchip/rk3288_crypto.h b/drivers/crypto/rockchip/rk3288_crypto.h
index 54ee5b3..18e2b3f 100644
--- a/drivers/crypto/rockchip/rk3288_crypto.h
+++ b/drivers/crypto/rockchip/rk3288_crypto.h
@@ -3,7 +3,7 @@
 #define __RK3288_CRYPTO_H__
 
 #include <crypto/aes.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/algapi.h>
 #include <linux/interrupt.h>
 #include <linux/delay.h>
diff --git a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
index 96078aa..d0f4b2d 100644
--- a/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
+++ b/drivers/crypto/rockchip/rk3288_crypto_ablkcipher.c
@@ -46,15 +46,12 @@
 static int rk_des_setkey(struct crypto_ablkcipher *cipher,
 			 const u8 *key, unsigned int keylen)
 {
-	struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher);
-	struct rk_cipher_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 tmp[DES_EXPKEY_WORDS];
+	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(cipher);
+	int err;
 
-	if (!des_ekey(tmp, key) &&
-	    (tfm->crt_flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		return -EINVAL;
-	}
+	err = verify_ablkcipher_des_key(cipher, key);
+	if (err)
+		return err;
 
 	ctx->keylen = keylen;
 	memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
@@ -65,15 +62,11 @@
 			  const u8 *key, unsigned int keylen)
 {
 	struct rk_cipher_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	u32 flags;
 	int err;
 
-	flags = crypto_ablkcipher_get_flags(cipher);
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(cipher, flags);
+	err = verify_ablkcipher_des3_key(cipher, key);
+	if (err)
 		return err;
-	}
 
 	ctx->keylen = keylen;
 	memcpy_toio(ctx->dev->reg + RK_CRYPTO_TDES_KEY1_0, key, keylen);
diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c
index 9ef2523..010f1bb 100644
--- a/drivers/crypto/s5p-sss.c
+++ b/drivers/crypto/s5p-sss.c
@@ -2056,9 +2056,12 @@
 	struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm);
 	struct s5p_aes_dev *dev = ctx->dev;
 
+	if (!req->nbytes)
+		return 0;
+
 	if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE) &&
 			((mode & FLAGS_AES_MODE_MASK) != FLAGS_AES_CTR)) {
-		dev_err(dev->dev, "request size is not exact amount of AES blocks\n");
+		dev_dbg(dev->dev, "request size is not exact amount of AES blocks\n");
 		return -EINVAL;
 	}
 
@@ -2170,7 +2173,7 @@
 		.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER |
 					  CRYPTO_ALG_ASYNC |
 					  CRYPTO_ALG_KERN_DRIVER_ONLY,
-		.cra_blocksize		= AES_BLOCK_SIZE,
+		.cra_blocksize		= 1,
 		.cra_ctxsize		= sizeof(struct s5p_aes_ctx),
 		.cra_alignmask		= 0x0f,
 		.cra_type		= &crypto_ablkcipher_type,
diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c
index b0b8e3d..8ac8ec6 100644
--- a/drivers/crypto/sahara.c
+++ b/drivers/crypto/sahara.c
@@ -1403,10 +1403,8 @@
 
 	/* Get the IRQ */
 	irq = platform_get_irq(pdev,  0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "failed to get irq resource\n");
+	if (irq < 0)
 		return irq;
-	}
 
 	err = devm_request_irq(&pdev->dev, irq, sahara_irq_handler,
 			       0, dev_name(&pdev->dev), dev);
diff --git a/drivers/crypto/stm32/Kconfig b/drivers/crypto/stm32/Kconfig
index d657628..1aba937 100644
--- a/drivers/crypto/stm32/Kconfig
+++ b/drivers/crypto/stm32/Kconfig
@@ -25,7 +25,7 @@
 	depends on ARCH_STM32
 	select CRYPTO_HASH
 	select CRYPTO_ENGINE
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
           This enables support for the CRYP (AES/DES/TDES) hw accelerator which
 	  can be found on STMicroelectronics STM32 SOC.
diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index 440c9f1..9e11c34 100644
--- a/drivers/crypto/stm32/stm32-crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
@@ -255,7 +255,6 @@
 {
 	struct device *dev = &pdev->dev;
 	struct stm32_crc *crc;
-	struct resource *res;
 	int ret;
 
 	crc = devm_kzalloc(dev, sizeof(*crc), GFP_KERNEL);
@@ -264,8 +263,7 @@
 
 	crc->dev = dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	crc->regs = devm_ioremap_resource(dev, res);
+	crc->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(crc->regs)) {
 		dev_err(dev, "Cannot map CRC IO\n");
 		return PTR_ERR(crc->regs);
diff --git a/drivers/crypto/stm32/stm32-cryp.c b/drivers/crypto/stm32/stm32-cryp.c
index 98ae028..ba5ea64 100644
--- a/drivers/crypto/stm32/stm32-cryp.c
+++ b/drivers/crypto/stm32/stm32-cryp.c
@@ -15,7 +15,7 @@
 #include <linux/reset.h>
 
 #include <crypto/aes.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/engine.h>
 #include <crypto/scatterwalk.h>
 #include <crypto/internal/aead.h>
@@ -767,35 +767,15 @@
 static int stm32_cryp_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 				 unsigned int keylen)
 {
-	u32 tmp[DES_EXPKEY_WORDS];
-
-	if (keylen != DES_KEY_SIZE)
-		return -EINVAL;
-
-	if ((crypto_ablkcipher_get_flags(tfm) &
-	     CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) &&
-	    unlikely(!des_ekey(tmp, key))) {
-		crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
-		return -EINVAL;
-	}
-
-	return stm32_cryp_setkey(tfm, key, keylen);
+	return verify_ablkcipher_des_key(tfm, key) ?:
+	       stm32_cryp_setkey(tfm, key, keylen);
 }
 
 static int stm32_cryp_tdes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
 				  unsigned int keylen)
 {
-	u32 flags;
-	int err;
-
-	flags = crypto_ablkcipher_get_flags(tfm);
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(tfm, flags);
-		return err;
-	}
-
-	return stm32_cryp_setkey(tfm, key, keylen);
+	return verify_ablkcipher_des3_key(tfm, key) ?:
+	       stm32_cryp_setkey(tfm, key, keylen);
 }
 
 static int stm32_cryp_aes_aead_setkey(struct crypto_aead *tfm, const u8 *key,
@@ -1955,7 +1935,6 @@
 {
 	struct device *dev = &pdev->dev;
 	struct stm32_cryp *cryp;
-	struct resource *res;
 	struct reset_control *rst;
 	int irq, ret;
 
@@ -1969,16 +1948,13 @@
 
 	cryp->dev = dev;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	cryp->regs = devm_ioremap_resource(dev, res);
+	cryp->regs = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(cryp->regs))
 		return PTR_ERR(cryp->regs);
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "Cannot get IRQ resource\n");
+	if (irq < 0)
 		return irq;
-	}
 
 	ret = devm_request_threaded_irq(dev, irq, stm32_cryp_irq,
 					stm32_cryp_irq_thread, IRQF_ONESHOT,
diff --git a/drivers/crypto/stm32/stm32-hash.c b/drivers/crypto/stm32/stm32-hash.c
index 2b70d87..cfc8e0e 100644
--- a/drivers/crypto/stm32/stm32-hash.c
+++ b/drivers/crypto/stm32/stm32-hash.c
@@ -1450,10 +1450,8 @@
 		return ret;
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(dev, "Cannot get IRQ resource\n");
+	if (irq < 0)
 		return irq;
-	}
 
 	ret = devm_request_threaded_irq(dev, irq, stm32_hash_irq_handler,
 					stm32_hash_irq_thread, IRQF_ONESHOT,
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
index 6f7cbf6..6536fd4 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-cipher.c
@@ -542,25 +542,11 @@
 			unsigned int keylen)
 {
 	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
-	struct sun4i_ss_ctx *ss = op->ss;
-	u32 flags;
-	u32 tmp[DES_EXPKEY_WORDS];
-	int ret;
+	int err;
 
-	if (unlikely(keylen != DES_KEY_SIZE)) {
-		dev_err(ss->dev, "Invalid keylen %u\n", keylen);
-		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
-		return -EINVAL;
-	}
-
-	flags = crypto_skcipher_get_flags(tfm);
-
-	ret = des_ekey(tmp, key);
-	if (unlikely(!ret) && (flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		crypto_skcipher_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
-		dev_dbg(ss->dev, "Weak key %u\n", keylen);
-		return -EINVAL;
-	}
+	err = verify_skcipher_des_key(tfm, key);
+	if (err)
+		return err;
 
 	op->keylen = keylen;
 	memcpy(op->key, key, keylen);
@@ -578,8 +564,8 @@
 	struct sun4i_tfm_ctx *op = crypto_skcipher_ctx(tfm);
 	int err;
 
-	err = des3_verify_key(tfm, key);
-	if (unlikely(err))
+	err = verify_skcipher_des3_key(tfm, key);
+	if (err)
 		return err;
 
 	op->keylen = keylen;
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss-core.c b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
index 2e87042..9aa6fe0 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss-core.c
+++ b/drivers/crypto/sunxi-ss/sun4i-ss-core.c
@@ -225,7 +225,6 @@
 
 static int sun4i_ss_probe(struct platform_device *pdev)
 {
-	struct resource *res;
 	u32 v;
 	int err, i;
 	unsigned long cr;
@@ -240,8 +239,7 @@
 	if (!ss)
 		return -ENOMEM;
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	ss->base = devm_ioremap_resource(&pdev->dev, res);
+	ss->base = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(ss->base)) {
 		dev_err(&pdev->dev, "Cannot request MMIO\n");
 		return PTR_ERR(ss->base);
diff --git a/drivers/crypto/sunxi-ss/sun4i-ss.h b/drivers/crypto/sunxi-ss/sun4i-ss.h
index 8654d48..35a27a7 100644
--- a/drivers/crypto/sunxi-ss/sun4i-ss.h
+++ b/drivers/crypto/sunxi-ss/sun4i-ss.h
@@ -29,7 +29,7 @@
 #include <crypto/internal/hash.h>
 #include <crypto/internal/skcipher.h>
 #include <crypto/aes.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/internal/rng.h>
 #include <crypto/rng.h>
 
diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c
index c9d686a..cb6c10b 100644
--- a/drivers/crypto/talitos.c
+++ b/drivers/crypto/talitos.c
@@ -30,7 +30,7 @@
 
 #include <crypto/algapi.h>
 #include <crypto/aes.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/sha.h>
 #include <crypto/md5.h>
 #include <crypto/internal/aead.h>
@@ -925,7 +925,6 @@
 	struct talitos_ctx *ctx = crypto_aead_ctx(authenc);
 	struct device *dev = ctx->dev;
 	struct crypto_authenc_keys keys;
-	u32 flags;
 	int err;
 
 	err = crypto_authenc_extractkeys(&keys, key, keylen);
@@ -936,15 +935,9 @@
 	if (keys.authkeylen + keys.enckeylen > TALITOS_MAX_KEY_SIZE)
 		goto badkey;
 
-	if (keys.enckeylen != DES3_EDE_KEY_SIZE)
-		goto badkey;
-
-	flags = crypto_aead_get_flags(authenc);
-	err = __des3_verify_key(&flags, keys.enckey);
-	if (unlikely(err)) {
-		crypto_aead_set_flags(authenc, flags);
+	err = verify_aead_des3_key(authenc, keys.enckey, keys.enckeylen);
+	if (err)
 		goto out;
-	}
 
 	if (ctx->keylen)
 		dma_unmap_single(dev, ctx->dma_key, ctx->keylen, DMA_TO_DEVICE);
@@ -1517,32 +1510,15 @@
 static int ablkcipher_des_setkey(struct crypto_ablkcipher *cipher,
 				 const u8 *key, unsigned int keylen)
 {
-	u32 tmp[DES_EXPKEY_WORDS];
-
-	if (unlikely(crypto_ablkcipher_get_flags(cipher) &
-		     CRYPTO_TFM_REQ_FORBID_WEAK_KEYS) &&
-	    !des_ekey(tmp, key)) {
-		crypto_ablkcipher_set_flags(cipher, CRYPTO_TFM_RES_WEAK_KEY);
-		return -EINVAL;
-	}
-
-	return ablkcipher_setkey(cipher, key, keylen);
+	return verify_ablkcipher_des_key(cipher, key) ?:
+	       ablkcipher_setkey(cipher, key, keylen);
 }
 
 static int ablkcipher_des3_setkey(struct crypto_ablkcipher *cipher,
 				  const u8 *key, unsigned int keylen)
 {
-	u32 flags;
-	int err;
-
-	flags = crypto_ablkcipher_get_flags(cipher);
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(cipher, flags);
-		return err;
-	}
-
-	return ablkcipher_setkey(cipher, key, keylen);
+	return verify_ablkcipher_des3_key(cipher, key) ?:
+	       ablkcipher_setkey(cipher, key, keylen);
 }
 
 static int ablkcipher_aes_setkey(struct crypto_ablkcipher *cipher,
diff --git a/drivers/crypto/ux500/Kconfig b/drivers/crypto/ux500/Kconfig
index 349d34e..b1c6f73 100644
--- a/drivers/crypto/ux500/Kconfig
+++ b/drivers/crypto/ux500/Kconfig
@@ -9,7 +9,7 @@
 	depends on CRYPTO_DEV_UX500
 	select CRYPTO_ALGAPI
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	help
         This selects the crypto driver for the UX500_CRYP hardware. It supports
         AES-ECB, CBC and CTR with keys sizes of 128, 192 and 256 bit sizes.
diff --git a/drivers/crypto/ux500/cryp/cryp.h b/drivers/crypto/ux500/cryp/cryp.h
index bd89504..8da7f87 100644
--- a/drivers/crypto/ux500/cryp/cryp.h
+++ b/drivers/crypto/ux500/cryp/cryp.h
@@ -241,12 +241,12 @@
 	struct clk *clk;
 	struct regulator *pwr_regulator;
 	int power_status;
-	struct spinlock ctx_lock;
+	spinlock_t ctx_lock;
 	struct cryp_ctx *current_ctx;
 	struct klist_node list_node;
 	struct cryp_dma dma;
 	bool power_state;
-	struct spinlock power_state_spinlock;
+	spinlock_t power_state_spinlock;
 	bool restore_dev_ctx;
 };
 
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index b4beb54..1628ae7 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -29,7 +29,7 @@
 #include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <crypto/ctr.h>
-#include <crypto/des.h>
+#include <crypto/internal/des.h>
 #include <crypto/scatterwalk.h>
 
 #include <linux/platform_data/crypto-ux500.h>
@@ -528,9 +528,9 @@
 
 	dev_dbg(ctx->device->dev, "[%s]: ", __func__);
 
-	if (unlikely(!IS_ALIGNED((u32)sg, 4))) {
+	if (unlikely(!IS_ALIGNED((unsigned long)sg, 4))) {
 		dev_err(ctx->device->dev, "[%s]: Data in sg list isn't "
-			"aligned! Addr: 0x%08x", __func__, (u32)sg);
+			"aligned! Addr: 0x%08lx", __func__, (unsigned long)sg);
 		return -EFAULT;
 	}
 
@@ -763,9 +763,9 @@
 
 	ctx->outlen = ctx->datalen;
 
-	if (unlikely(!IS_ALIGNED((u32)indata, 4))) {
+	if (unlikely(!IS_ALIGNED((unsigned long)indata, 4))) {
 		pr_debug(DEV_DBG_NAME " [%s]: Data isn't aligned! Addr: "
-			 "0x%08x", __func__, (u32)indata);
+			 "0x%08lx", __func__, (unsigned long)indata);
 		return -EINVAL;
 	}
 
@@ -987,26 +987,13 @@
 				 const u8 *key, unsigned int keylen)
 {
 	struct cryp_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	u32 *flags = &cipher->base.crt_flags;
-	u32 tmp[DES_EXPKEY_WORDS];
-	int ret;
+	int err;
 
 	pr_debug(DEV_DBG_NAME " [%s]", __func__);
-	if (keylen != DES_KEY_SIZE) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_RES_BAD_KEY_LEN",
-				__func__);
-		return -EINVAL;
-	}
 
-	ret = des_ekey(tmp, key);
-	if (unlikely(ret == 0) &&
-	    (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-		pr_debug(DEV_DBG_NAME " [%s]: CRYPTO_TFM_RES_WEAK_KEY",
-			 __func__);
-		return -EINVAL;
-	}
+	err = verify_ablkcipher_des_key(cipher, key);
+	if (err)
+		return err;
 
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
@@ -1019,17 +1006,13 @@
 				  const u8 *key, unsigned int keylen)
 {
 	struct cryp_ctx *ctx = crypto_ablkcipher_ctx(cipher);
-	u32 flags;
 	int err;
 
 	pr_debug(DEV_DBG_NAME " [%s]", __func__);
 
-	flags = crypto_ablkcipher_get_flags(cipher);
-	err = __des3_verify_key(&flags, key);
-	if (unlikely(err)) {
-		crypto_ablkcipher_set_flags(cipher, flags);
+	err = verify_ablkcipher_des3_key(cipher, key);
+	if (err)
 		return err;
-	}
 
 	memcpy(ctx->key, key, keylen);
 	ctx->keylen = keylen;
diff --git a/drivers/crypto/ux500/hash/hash_alg.h b/drivers/crypto/ux500/hash/hash_alg.h
index ab2bd00..7c9bcc1 100644
--- a/drivers/crypto/ux500/hash/hash_alg.h
+++ b/drivers/crypto/ux500/hash/hash_alg.h
@@ -366,10 +366,10 @@
 	phys_addr_t             phybase;
 	struct klist_node	list_node;
 	struct device		*dev;
-	struct spinlock		ctx_lock;
+	spinlock_t		ctx_lock;
 	struct hash_ctx		*current_ctx;
 	bool			power_state;
-	struct spinlock		power_state_lock;
+	spinlock_t		power_state_lock;
 	struct regulator	*regulator;
 	struct clk		*clk;
 	bool			restore_dev_state;
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index f1ebc3d..c172a69 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -806,7 +806,7 @@
 			 * HW peripheral, otherwise we first copy data
 			 * to a local buffer
 			 */
-			if ((0 == (((u32)data_buffer) % 4)) &&
+			if (IS_ALIGNED((unsigned long)data_buffer, 4) &&
 			    (0 == *index))
 				hash_processblock(device_data,
 						  (const u32 *)data_buffer,
@@ -864,7 +864,8 @@
 	if (ret)
 		return ret;
 
-	dev_dbg(device_data->dev, "%s: (ctx=0x%x)!\n", __func__, (u32) ctx);
+	dev_dbg(device_data->dev, "%s: (ctx=0x%lx)!\n", __func__,
+		(unsigned long)ctx);
 
 	if (req_ctx->updated) {
 		ret = hash_resume_state(device_data, &device_data->state);
@@ -969,7 +970,8 @@
 	if (ret)
 		return ret;
 
-	dev_dbg(device_data->dev, "%s: (ctx=0x%x)!\n", __func__, (u32) ctx);
+	dev_dbg(device_data->dev, "%s: (ctx=0x%lx)!\n", __func__,
+		(unsigned long)ctx);
 
 	if (req_ctx->updated) {
 		ret = hash_resume_state(device_data, &device_data->state);
@@ -1272,8 +1274,8 @@
 	else
 		loop_ctr = SHA256_DIGEST_SIZE / sizeof(u32);
 
-	dev_dbg(device_data->dev, "%s: digest array:(0x%x)\n",
-		__func__, (u32) digest);
+	dev_dbg(device_data->dev, "%s: digest array:(0x%lx)\n",
+		__func__, (unsigned long)digest);
 
 	/* Copy result into digest array */
 	for (count = 0; count < loop_ctr; count++) {
diff --git a/drivers/crypto/virtio/virtio_crypto_algs.c b/drivers/crypto/virtio/virtio_crypto_algs.c
index 10f266d..42d1920 100644
--- a/drivers/crypto/virtio/virtio_crypto_algs.c
+++ b/drivers/crypto/virtio/virtio_crypto_algs.c
@@ -129,13 +129,11 @@
 	 * Avoid to do DMA from the stack, switch to using
 	 * dynamically-allocated for the key
 	 */
-	uint8_t *cipher_key = kmalloc(keylen, GFP_ATOMIC);
+	uint8_t *cipher_key = kmemdup(key, keylen, GFP_ATOMIC);
 
 	if (!cipher_key)
 		return -ENOMEM;
 
-	memcpy(cipher_key, key, keylen);
-
 	spin_lock(&vcrypto->ctrl_lock);
 	/* Pad ctrl header */
 	vcrypto->ctrl.header.opcode =
diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c
index 49f7258..d59e736 100644
--- a/drivers/crypto/vmx/aes_xts.c
+++ b/drivers/crypto/vmx/aes_xts.c
@@ -84,7 +84,7 @@
 	u8 tweak[AES_BLOCK_SIZE];
 	int ret;
 
-	if (!crypto_simd_usable()) {
+	if (!crypto_simd_usable() || (req->cryptlen % XTS_BLOCK_SIZE) != 0) {
 		struct skcipher_request *subreq = skcipher_request_ctx(req);
 
 		*subreq = *req;
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index b16219e..350bc30 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -16,7 +16,7 @@
 	select CRYPTO_GCM
 	select CRYPTO_ECB
 	select CRYPTO_AES
-	select CRYPTO_DES
+	select CRYPTO_LIB_DES
 	select KEYS
 	help
 	  This is the client VFS module for the SMB3 family of NAS protocols,
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 3289b56..4e2f748 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -1601,7 +1601,6 @@
 	("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and "
 	"also older servers complying with the SNIA CIFS Specification)");
 MODULE_VERSION(CIFS_VERSION);
-MODULE_SOFTDEP("pre: des");
 MODULE_SOFTDEP("pre: ecb");
 MODULE_SOFTDEP("pre: hmac");
 MODULE_SOFTDEP("pre: md4");
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index 2b6d87b..39a9384 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -11,13 +11,14 @@
 
 */
 
-#include <linux/crypto.h>
 #include <linux/module.h>
 #include <linux/slab.h>
+#include <linux/fips.h>
 #include <linux/fs.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/random.h>
+#include <crypto/des.h>
 #include "cifs_fs_sb.h"
 #include "cifs_unicode.h"
 #include "cifspdu.h"
@@ -58,19 +59,18 @@
 smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
 {
 	unsigned char key2[8];
-	struct crypto_cipher *tfm_des;
+	struct des_ctx ctx;
 
 	str_to_key(key, key2);
 
-	tfm_des = crypto_alloc_cipher("des", 0, 0);
-	if (IS_ERR(tfm_des)) {
-		cifs_dbg(VFS, "could not allocate des crypto API\n");
-		return PTR_ERR(tfm_des);
+	if (fips_enabled) {
+		cifs_dbg(VFS, "FIPS compliance enabled: DES not permitted\n");
+		return -ENOENT;
 	}
 
-	crypto_cipher_setkey(tfm_des, key2, 8);
-	crypto_cipher_encrypt_one(tfm_des, out, in);
-	crypto_free_cipher(tfm_des);
+	des_expand_key(&ctx, key2, DES_KEY_SIZE);
+	des_encrypt(&ctx, out, in);
+	memzero_explicit(&ctx, sizeof(ctx));
 
 	return 0;
 }
diff --git a/include/asm-generic/Kbuild b/include/asm-generic/Kbuild
index 6f4536d..adff14f 100644
--- a/include/asm-generic/Kbuild
+++ b/include/asm-generic/Kbuild
@@ -3,3 +3,5 @@
 # asm headers that all architectures except um should have
 # (This file is not included when SRCARCH=um since UML borrows several
 # asm headers from the host architecutre.)
+
+mandatory-y += simd.h
diff --git a/include/crypto/aes.h b/include/crypto/aes.h
index 0fdb542..2090729 100644
--- a/include/crypto/aes.h
+++ b/include/crypto/aes.h
@@ -29,12 +29,62 @@
 };
 
 extern const u32 crypto_ft_tab[4][256] ____cacheline_aligned;
-extern const u32 crypto_fl_tab[4][256] ____cacheline_aligned;
 extern const u32 crypto_it_tab[4][256] ____cacheline_aligned;
-extern const u32 crypto_il_tab[4][256] ____cacheline_aligned;
+
+/*
+ * validate key length for AES algorithms
+ */
+static inline int aes_check_keylen(unsigned int keylen)
+{
+	switch (keylen) {
+	case AES_KEYSIZE_128:
+	case AES_KEYSIZE_192:
+	case AES_KEYSIZE_256:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
 
 int crypto_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len);
-int crypto_aes_expand_key(struct crypto_aes_ctx *ctx, const u8 *in_key,
-		unsigned int key_len);
+
+/**
+ * aes_expandkey - Expands the AES key as described in FIPS-197
+ * @ctx:	The location where the computed key will be stored.
+ * @in_key:	The supplied key.
+ * @key_len:	The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ * The expanded key size is 240 bytes (max of 14 rounds with a unique 16 bytes
+ * key schedule plus a 16 bytes key which is used before the first round).
+ * The decryption key is prepared for the "Equivalent Inverse Cipher" as
+ * described in FIPS-197. The first slot (16 bytes) of each key (enc or dec) is
+ * for the initial combination, the second slot for the first round and so on.
+ */
+int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+		  unsigned int key_len);
+
+/**
+ * aes_encrypt - Encrypt a single AES block
+ * @ctx:	Context struct containing the key schedule
+ * @out:	Buffer to store the ciphertext
+ * @in:		Buffer containing the plaintext
+ */
+void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
+
+/**
+ * aes_decrypt - Decrypt a single AES block
+ * @ctx:	Context struct containing the key schedule
+ * @out:	Buffer to store the plaintext
+ * @in:		Buffer containing the ciphertext
+ */
+void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in);
+
+extern const u8 crypto_aes_sbox[];
+extern const u8 crypto_aes_inv_sbox[];
+
 #endif
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index dc1106a..e5bd302 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -409,10 +409,8 @@
 
 static inline void crypto_yield(u32 flags)
 {
-#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PREEMPT_VOLUNTARY)
 	if (flags & CRYPTO_TFM_REQ_MAY_SLEEP)
 		cond_resched();
-#endif
 }
 
 int crypto_register_notifier(struct notifier_block *nb);
diff --git a/include/crypto/ctr.h b/include/crypto/ctr.h
index 06984a26..a1c66d1 100644
--- a/include/crypto/ctr.h
+++ b/include/crypto/ctr.h
@@ -8,8 +8,58 @@
 #ifndef _CRYPTO_CTR_H
 #define _CRYPTO_CTR_H
 
+#include <crypto/algapi.h>
+#include <crypto/internal/skcipher.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
 #define CTR_RFC3686_NONCE_SIZE 4
 #define CTR_RFC3686_IV_SIZE 8
 #define CTR_RFC3686_BLOCK_SIZE 16
 
+static inline int crypto_ctr_encrypt_walk(struct skcipher_request *req,
+					  void (*fn)(struct crypto_skcipher *,
+						     const u8 *, u8 *))
+{
+	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+	int blocksize = crypto_skcipher_chunksize(tfm);
+	u8 buf[MAX_CIPHER_BLOCKSIZE];
+	struct skcipher_walk walk;
+	int err;
+
+	/* avoid integer division due to variable blocksize parameter */
+	if (WARN_ON_ONCE(!is_power_of_2(blocksize)))
+		return -EINVAL;
+
+	err = skcipher_walk_virt(&walk, req, false);
+
+	while (walk.nbytes > 0) {
+		u8 *dst = walk.dst.virt.addr;
+		u8 *src = walk.src.virt.addr;
+		int nbytes = walk.nbytes;
+		int tail = 0;
+
+		if (nbytes < walk.total) {
+			tail = walk.nbytes & (blocksize - 1);
+			nbytes -= tail;
+		}
+
+		do {
+			int bsize = min(nbytes, blocksize);
+
+			fn(tfm, walk.iv, buf);
+
+			crypto_xor_cpy(dst, src, buf, bsize);
+			crypto_inc(walk.iv, blocksize);
+
+			dst += bsize;
+			src += bsize;
+			nbytes -= bsize;
+		} while (nbytes > 0);
+
+		err = skcipher_walk_done(&walk, tail);
+	}
+	return err;
+}
+
 #endif  /* _CRYPTO_CTR_H */
diff --git a/include/crypto/des.h b/include/crypto/des.h
index 72c7c8e..7812b43 100644
--- a/include/crypto/des.h
+++ b/include/crypto/des.h
@@ -6,10 +6,7 @@
 #ifndef __CRYPTO_DES_H
 #define __CRYPTO_DES_H
 
-#include <crypto/skcipher.h>
-#include <linux/compiler.h>
-#include <linux/fips.h>
-#include <linux/string.h>
+#include <linux/types.h>
 
 #define DES_KEY_SIZE		8
 #define DES_EXPKEY_WORDS	32
@@ -19,48 +16,42 @@
 #define DES3_EDE_EXPKEY_WORDS	(3 * DES_EXPKEY_WORDS)
 #define DES3_EDE_BLOCK_SIZE	DES_BLOCK_SIZE
 
-static inline int __des3_verify_key(u32 *flags, const u8 *key)
-{
-	int err = -EINVAL;
-	u32 K[6];
+struct des_ctx {
+	u32 expkey[DES_EXPKEY_WORDS];
+};
 
-	memcpy(K, key, DES3_EDE_KEY_SIZE);
+struct des3_ede_ctx {
+	u32 expkey[DES3_EDE_EXPKEY_WORDS];
+};
 
-	if (unlikely(!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
-		     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
-		     (fips_enabled ||
-		      (*flags & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)))
-		goto bad;
+void des_encrypt(const struct des_ctx *ctx, u8 *dst, const u8 *src);
+void des_decrypt(const struct des_ctx *ctx, u8 *dst, const u8 *src);
 
-	if (unlikely(!((K[0] ^ K[4]) | (K[1] ^ K[5]))) && fips_enabled)
-		goto bad;
+void des3_ede_encrypt(const struct des3_ede_ctx *dctx, u8 *dst, const u8 *src);
+void des3_ede_decrypt(const struct des3_ede_ctx *dctx, u8 *dst, const u8 *src);
 
-	err = 0;
+/**
+ * des_expand_key - Expand a DES input key into a key schedule
+ * @ctx: the key schedule
+ * @key: buffer containing the input key
+ * @len: size of the buffer contents
+ *
+ * Returns 0 on success, -EINVAL if the input key is rejected and -ENOKEY if
+ * the key is accepted but has been found to be weak.
+ */
+int des_expand_key(struct des_ctx *ctx, const u8 *key, unsigned int keylen);
 
-out:
-	memzero_explicit(K, DES3_EDE_KEY_SIZE);
-
-	return err;
-
-bad:
-	*flags |= CRYPTO_TFM_RES_WEAK_KEY;
-	goto out;
-}
-
-static inline int des3_verify_key(struct crypto_skcipher *tfm, const u8 *key)
-{
-	u32 flags;
-	int err;
-
-	flags = crypto_skcipher_get_flags(tfm);
-	err = __des3_verify_key(&flags, key);
-	crypto_skcipher_set_flags(tfm, flags);
-	return err;
-}
-
-extern unsigned long des_ekey(u32 *pe, const u8 *k);
-
-extern int __des3_ede_setkey(u32 *expkey, u32 *flags, const u8 *key,
-			     unsigned int keylen);
+/**
+ * des3_ede_expand_key - Expand a triple DES input key into a key schedule
+ * @ctx: the key schedule
+ * @key: buffer containing the input key
+ * @len: size of the buffer contents
+ *
+ * Returns 0 on success, -EINVAL if the input key is rejected and -ENOKEY if
+ * the key is accepted but has been found to be weak. Note that weak keys will
+ * be rejected (and -EINVAL will be returned) when running in FIPS mode.
+ */
+int des3_ede_expand_key(struct des3_ede_ctx *ctx, const u8 *key,
+			unsigned int keylen);
 
 #endif /* __CRYPTO_DES_H */
diff --git a/include/crypto/gcm.h b/include/crypto/gcm.h
index c50e057..9d7eff0 100644
--- a/include/crypto/gcm.h
+++ b/include/crypto/gcm.h
@@ -1,8 +1,63 @@
 #ifndef _CRYPTO_GCM_H
 #define _CRYPTO_GCM_H
 
+#include <linux/errno.h>
+
 #define GCM_AES_IV_SIZE 12
 #define GCM_RFC4106_IV_SIZE 8
 #define GCM_RFC4543_IV_SIZE 8
 
+/*
+ * validate authentication tag for GCM
+ */
+static inline int crypto_gcm_check_authsize(unsigned int authsize)
+{
+	switch (authsize) {
+	case 4:
+	case 8:
+	case 12:
+	case 13:
+	case 14:
+	case 15:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * validate authentication tag for RFC4106
+ */
+static inline int crypto_rfc4106_check_authsize(unsigned int authsize)
+{
+	switch (authsize) {
+	case 8:
+	case 12:
+	case 16:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/*
+ * validate assoclen for RFC4106/RFC4543
+ */
+static inline int crypto_ipsec_check_assoclen(unsigned int assoclen)
+{
+	switch (assoclen) {
+	case 16:
+	case 20:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
 #endif
diff --git a/include/crypto/ghash.h b/include/crypto/ghash.h
index 9136301..f832c9f 100644
--- a/include/crypto/ghash.h
+++ b/include/crypto/ghash.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /*
- * Common values for GHASH algorithms
+ * Common values for the GHASH hash function
  */
 
 #ifndef __CRYPTO_GHASH_H__
diff --git a/include/crypto/internal/cryptouser.h b/include/crypto/internal/cryptouser.h
index 8c602b1..fd54074 100644
--- a/include/crypto/internal/cryptouser.h
+++ b/include/crypto/internal/cryptouser.h
@@ -1,14 +1,15 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/cryptouser.h>
 #include <net/netlink.h>
 
-extern struct sock *crypto_nlsk;
-
 struct crypto_alg *crypto_alg_match(struct crypto_user_alg *p, int exact);
 
 #ifdef CONFIG_CRYPTO_STATS
 int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs);
 #else
-static int crypto_reportstat(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, struct nlattr **attrs)
+static inline int crypto_reportstat(struct sk_buff *in_skb,
+				    struct nlmsghdr *in_nlh,
+				    struct nlattr **attrs)
 {
 	return -ENOTSUPP;
 }
diff --git a/include/crypto/internal/des.h b/include/crypto/internal/des.h
new file mode 100644
index 0000000..81ea1a4
--- /dev/null
+++ b/include/crypto/internal/des.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DES & Triple DES EDE key verification helpers
+ */
+
+#ifndef __CRYPTO_INTERNAL_DES_H
+#define __CRYPTO_INTERNAL_DES_H
+
+#include <linux/crypto.h>
+#include <linux/fips.h>
+#include <crypto/des.h>
+#include <crypto/aead.h>
+#include <crypto/skcipher.h>
+
+/**
+ * crypto_des_verify_key - Check whether a DES key is weak
+ * @tfm: the crypto algo
+ * @key: the key buffer
+ *
+ * Returns -EINVAL if the key is weak and the crypto TFM does not permit weak
+ * keys. Otherwise, 0 is returned.
+ *
+ * It is the job of the caller to ensure that the size of the key equals
+ * DES_KEY_SIZE.
+ */
+static inline int crypto_des_verify_key(struct crypto_tfm *tfm, const u8 *key)
+{
+	struct des_ctx tmp;
+	int err;
+
+	err = des_expand_key(&tmp, key, DES_KEY_SIZE);
+	if (err == -ENOKEY) {
+		if (crypto_tfm_get_flags(tfm) & CRYPTO_TFM_REQ_FORBID_WEAK_KEYS)
+			err = -EINVAL;
+		else
+			err = 0;
+	}
+
+	if (err)
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+
+	memzero_explicit(&tmp, sizeof(tmp));
+	return err;
+}
+
+/*
+ * RFC2451:
+ *
+ *   For DES-EDE3, there is no known need to reject weak or
+ *   complementation keys.  Any weakness is obviated by the use of
+ *   multiple keys.
+ *
+ *   However, if the first two or last two independent 64-bit keys are
+ *   equal (k1 == k2 or k2 == k3), then the DES3 operation is simply the
+ *   same as DES.  Implementers MUST reject keys that exhibit this
+ *   property.
+ *
+ */
+static inline int des3_ede_verify_key(const u8 *key, unsigned int key_len,
+				      bool check_weak)
+{
+	int ret = fips_enabled ? -EINVAL : -ENOKEY;
+	u32 K[6];
+
+	memcpy(K, key, DES3_EDE_KEY_SIZE);
+
+	if ((!((K[0] ^ K[2]) | (K[1] ^ K[3])) ||
+	     !((K[2] ^ K[4]) | (K[3] ^ K[5]))) &&
+	    (fips_enabled || check_weak))
+		goto bad;
+
+	if ((!((K[0] ^ K[4]) | (K[1] ^ K[5]))) && fips_enabled)
+		goto bad;
+
+	ret = 0;
+bad:
+	memzero_explicit(K, DES3_EDE_KEY_SIZE);
+
+	return ret;
+}
+
+/**
+ * crypto_des3_ede_verify_key - Check whether a DES3-EDE key is weak
+ * @tfm: the crypto algo
+ * @key: the key buffer
+ *
+ * Returns -EINVAL if the key is weak and the crypto TFM does not permit weak
+ * keys or when running in FIPS mode. Otherwise, 0 is returned. Note that some
+ * keys are rejected in FIPS mode even if weak keys are permitted by the TFM
+ * flags.
+ *
+ * It is the job of the caller to ensure that the size of the key equals
+ * DES3_EDE_KEY_SIZE.
+ */
+static inline int crypto_des3_ede_verify_key(struct crypto_tfm *tfm,
+					     const u8 *key)
+{
+	int err;
+
+	err = des3_ede_verify_key(key, DES3_EDE_KEY_SIZE,
+				  crypto_tfm_get_flags(tfm) &
+				  CRYPTO_TFM_REQ_FORBID_WEAK_KEYS);
+	if (err)
+		crypto_tfm_set_flags(tfm, CRYPTO_TFM_RES_WEAK_KEY);
+	return err;
+}
+
+static inline int verify_skcipher_des_key(struct crypto_skcipher *tfm,
+					  const u8 *key)
+{
+	return crypto_des_verify_key(crypto_skcipher_tfm(tfm), key);
+}
+
+static inline int verify_skcipher_des3_key(struct crypto_skcipher *tfm,
+					   const u8 *key)
+{
+	return crypto_des3_ede_verify_key(crypto_skcipher_tfm(tfm), key);
+}
+
+static inline int verify_ablkcipher_des_key(struct crypto_ablkcipher *tfm,
+					    const u8 *key)
+{
+	return crypto_des_verify_key(crypto_ablkcipher_tfm(tfm), key);
+}
+
+static inline int verify_ablkcipher_des3_key(struct crypto_ablkcipher *tfm,
+					     const u8 *key)
+{
+	return crypto_des3_ede_verify_key(crypto_ablkcipher_tfm(tfm), key);
+}
+
+static inline int verify_aead_des_key(struct crypto_aead *tfm, const u8 *key,
+				      int keylen)
+{
+	if (keylen != DES_KEY_SIZE) {
+		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return crypto_des_verify_key(crypto_aead_tfm(tfm), key);
+}
+
+static inline int verify_aead_des3_key(struct crypto_aead *tfm, const u8 *key,
+				       int keylen)
+{
+	if (keylen != DES3_EDE_KEY_SIZE) {
+		crypto_aead_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN);
+		return -EINVAL;
+	}
+	return crypto_des3_ede_verify_key(crypto_aead_tfm(tfm), key);
+}
+
+#endif /* __CRYPTO_INTERNAL_DES_H */
diff --git a/include/crypto/internal/skcipher.h b/include/crypto/internal/skcipher.h
index d68faa5..734b6f7 100644
--- a/include/crypto/internal/skcipher.h
+++ b/include/crypto/internal/skcipher.h
@@ -148,6 +148,11 @@
 			       struct aead_request *req, bool atomic);
 void skcipher_walk_complete(struct skcipher_walk *walk, int err);
 
+static inline void skcipher_walk_abort(struct skcipher_walk *walk)
+{
+	skcipher_walk_done(walk, -ECANCELED);
+}
+
 static inline void ablkcipher_request_complete(struct ablkcipher_request *req,
 					       int err)
 {
diff --git a/include/crypto/morus1280_glue.h b/include/crypto/morus1280_glue.h
deleted file mode 100644
index 5cefddb..0000000
--- a/include/crypto/morus1280_glue.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * The MORUS-1280 Authenticated-Encryption Algorithm
- *   Common glue skeleton -- header file
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#ifndef _CRYPTO_MORUS1280_GLUE_H
-#define _CRYPTO_MORUS1280_GLUE_H
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <crypto/algapi.h>
-#include <crypto/aead.h>
-#include <crypto/morus_common.h>
-
-#define MORUS1280_WORD_SIZE 8
-#define MORUS1280_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS1280_WORD_SIZE)
-
-struct morus1280_block {
-	u8 bytes[MORUS1280_BLOCK_SIZE];
-};
-
-struct morus1280_glue_ops {
-	void (*init)(void *state, const void *key, const void *iv);
-	void (*ad)(void *state, const void *data, unsigned int length);
-	void (*enc)(void *state, const void *src, void *dst, unsigned int length);
-	void (*dec)(void *state, const void *src, void *dst, unsigned int length);
-	void (*enc_tail)(void *state, const void *src, void *dst, unsigned int length);
-	void (*dec_tail)(void *state, const void *src, void *dst, unsigned int length);
-	void (*final)(void *state, void *tag_xor, u64 assoclen, u64 cryptlen);
-};
-
-struct morus1280_ctx {
-	const struct morus1280_glue_ops *ops;
-	struct morus1280_block key;
-};
-
-void crypto_morus1280_glue_init_ops(struct crypto_aead *aead,
-				    const struct morus1280_glue_ops *ops);
-int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key,
-				 unsigned int keylen);
-int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm,
-				      unsigned int authsize);
-int crypto_morus1280_glue_encrypt(struct aead_request *req);
-int crypto_morus1280_glue_decrypt(struct aead_request *req);
-
-#define MORUS1280_DECLARE_ALG(id, driver_name, priority) \
-	static const struct morus1280_glue_ops crypto_morus1280_##id##_ops = {\
-		.init = crypto_morus1280_##id##_init, \
-		.ad = crypto_morus1280_##id##_ad, \
-		.enc = crypto_morus1280_##id##_enc, \
-		.enc_tail = crypto_morus1280_##id##_enc_tail, \
-		.dec = crypto_morus1280_##id##_dec, \
-		.dec_tail = crypto_morus1280_##id##_dec_tail, \
-		.final = crypto_morus1280_##id##_final, \
-	}; \
-	\
-	static int crypto_morus1280_##id##_init_tfm(struct crypto_aead *tfm) \
-	{ \
-		crypto_morus1280_glue_init_ops(tfm, &crypto_morus1280_##id##_ops); \
-		return 0; \
-	} \
-	\
-	static void crypto_morus1280_##id##_exit_tfm(struct crypto_aead *tfm) \
-	{ \
-	} \
-	\
-	static struct aead_alg crypto_morus1280_##id##_alg = { \
-		.setkey = crypto_morus1280_glue_setkey, \
-		.setauthsize = crypto_morus1280_glue_setauthsize, \
-		.encrypt = crypto_morus1280_glue_encrypt, \
-		.decrypt = crypto_morus1280_glue_decrypt, \
-		.init = crypto_morus1280_##id##_init_tfm, \
-		.exit = crypto_morus1280_##id##_exit_tfm, \
-		\
-		.ivsize = MORUS_NONCE_SIZE, \
-		.maxauthsize = MORUS_MAX_AUTH_SIZE, \
-		.chunksize = MORUS1280_BLOCK_SIZE, \
-		\
-		.base = { \
-			.cra_flags = CRYPTO_ALG_INTERNAL, \
-			.cra_blocksize = 1, \
-			.cra_ctxsize = sizeof(struct morus1280_ctx), \
-			.cra_alignmask = 0, \
-			.cra_priority = priority, \
-			\
-			.cra_name = "__morus1280", \
-			.cra_driver_name = "__"driver_name, \
-			\
-			.cra_module = THIS_MODULE, \
-		} \
-	}
-
-#endif /* _CRYPTO_MORUS1280_GLUE_H */
diff --git a/include/crypto/morus640_glue.h b/include/crypto/morus640_glue.h
deleted file mode 100644
index 0ee6266..0000000
--- a/include/crypto/morus640_glue.h
+++ /dev/null
@@ -1,97 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * The MORUS-640 Authenticated-Encryption Algorithm
- *   Common glue skeleton -- header file
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#ifndef _CRYPTO_MORUS640_GLUE_H
-#define _CRYPTO_MORUS640_GLUE_H
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <crypto/algapi.h>
-#include <crypto/aead.h>
-#include <crypto/morus_common.h>
-
-#define MORUS640_WORD_SIZE 4
-#define MORUS640_BLOCK_SIZE (MORUS_BLOCK_WORDS * MORUS640_WORD_SIZE)
-
-struct morus640_block {
-	u8 bytes[MORUS640_BLOCK_SIZE];
-};
-
-struct morus640_glue_ops {
-	void (*init)(void *state, const void *key, const void *iv);
-	void (*ad)(void *state, const void *data, unsigned int length);
-	void (*enc)(void *state, const void *src, void *dst, unsigned int length);
-	void (*dec)(void *state, const void *src, void *dst, unsigned int length);
-	void (*enc_tail)(void *state, const void *src, void *dst, unsigned int length);
-	void (*dec_tail)(void *state, const void *src, void *dst, unsigned int length);
-	void (*final)(void *state, void *tag_xor, u64 assoclen, u64 cryptlen);
-};
-
-struct morus640_ctx {
-	const struct morus640_glue_ops *ops;
-	struct morus640_block key;
-};
-
-void crypto_morus640_glue_init_ops(struct crypto_aead *aead,
-				   const struct morus640_glue_ops *ops);
-int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key,
-				unsigned int keylen);
-int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm,
-				     unsigned int authsize);
-int crypto_morus640_glue_encrypt(struct aead_request *req);
-int crypto_morus640_glue_decrypt(struct aead_request *req);
-
-#define MORUS640_DECLARE_ALG(id, driver_name, priority) \
-	static const struct morus640_glue_ops crypto_morus640_##id##_ops = {\
-		.init = crypto_morus640_##id##_init, \
-		.ad = crypto_morus640_##id##_ad, \
-		.enc = crypto_morus640_##id##_enc, \
-		.enc_tail = crypto_morus640_##id##_enc_tail, \
-		.dec = crypto_morus640_##id##_dec, \
-		.dec_tail = crypto_morus640_##id##_dec_tail, \
-		.final = crypto_morus640_##id##_final, \
-	}; \
-	\
-	static int crypto_morus640_##id##_init_tfm(struct crypto_aead *tfm) \
-	{ \
-		crypto_morus640_glue_init_ops(tfm, &crypto_morus640_##id##_ops); \
-		return 0; \
-	} \
-	\
-	static void crypto_morus640_##id##_exit_tfm(struct crypto_aead *tfm) \
-	{ \
-	} \
-	\
-	static struct aead_alg crypto_morus640_##id##_alg = {\
-		.setkey = crypto_morus640_glue_setkey, \
-		.setauthsize = crypto_morus640_glue_setauthsize, \
-		.encrypt = crypto_morus640_glue_encrypt, \
-		.decrypt = crypto_morus640_glue_decrypt, \
-		.init = crypto_morus640_##id##_init_tfm, \
-		.exit = crypto_morus640_##id##_exit_tfm, \
-		\
-		.ivsize = MORUS_NONCE_SIZE, \
-		.maxauthsize = MORUS_MAX_AUTH_SIZE, \
-		.chunksize = MORUS640_BLOCK_SIZE, \
-		\
-		.base = { \
-			.cra_flags = CRYPTO_ALG_INTERNAL, \
-			.cra_blocksize = 1, \
-			.cra_ctxsize = sizeof(struct morus640_ctx), \
-			.cra_alignmask = 0, \
-			.cra_priority = priority, \
-			\
-			.cra_name = "__morus640", \
-			.cra_driver_name = "__"driver_name, \
-			\
-			.cra_module = THIS_MODULE, \
-		} \
-	}
-
-#endif /* _CRYPTO_MORUS640_GLUE_H */
diff --git a/include/crypto/morus_common.h b/include/crypto/morus_common.h
deleted file mode 100644
index 969510a..0000000
--- a/include/crypto/morus_common.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * The MORUS Authenticated-Encryption Algorithm
- *   Common definitions
- *
- * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com>
- * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved.
- */
-
-#ifndef _CRYPTO_MORUS_COMMON_H
-#define _CRYPTO_MORUS_COMMON_H
-
-#define MORUS_BLOCK_WORDS 4
-#define MORUS_STATE_BLOCKS 5
-#define MORUS_NONCE_SIZE 16
-#define MORUS_MAX_AUTH_SIZE 16
-
-#endif /* _CRYPTO_MORUS_COMMON_H */
diff --git a/include/crypto/sha.h b/include/crypto/sha.h
index 8a46202..5c2132c 100644
--- a/include/crypto/sha.h
+++ b/include/crypto/sha.h
@@ -112,4 +112,51 @@
 
 extern int crypto_sha512_finup(struct shash_desc *desc, const u8 *data,
 			       unsigned int len, u8 *hash);
+
+/*
+ * Stand-alone implementation of the SHA256 algorithm. It is designed to
+ * have as little dependencies as possible so it can be used in the
+ * kexec_file purgatory. In other cases you should generally use the
+ * hash APIs from include/crypto/hash.h. Especially when hashing large
+ * amounts of data as those APIs may be hw-accelerated.
+ *
+ * For details see lib/crypto/sha256.c
+ */
+
+static inline int sha256_init(struct sha256_state *sctx)
+{
+	sctx->state[0] = SHA256_H0;
+	sctx->state[1] = SHA256_H1;
+	sctx->state[2] = SHA256_H2;
+	sctx->state[3] = SHA256_H3;
+	sctx->state[4] = SHA256_H4;
+	sctx->state[5] = SHA256_H5;
+	sctx->state[6] = SHA256_H6;
+	sctx->state[7] = SHA256_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+extern int sha256_update(struct sha256_state *sctx, const u8 *input,
+			 unsigned int length);
+extern int sha256_final(struct sha256_state *sctx, u8 *hash);
+
+static inline int sha224_init(struct sha256_state *sctx)
+{
+	sctx->state[0] = SHA224_H0;
+	sctx->state[1] = SHA224_H1;
+	sctx->state[2] = SHA224_H2;
+	sctx->state[3] = SHA224_H3;
+	sctx->state[4] = SHA224_H4;
+	sctx->state[5] = SHA224_H5;
+	sctx->state[6] = SHA224_H6;
+	sctx->state[7] = SHA224_H7;
+	sctx->count = 0;
+
+	return 0;
+}
+extern int sha224_update(struct sha256_state *sctx, const u8 *input,
+			 unsigned int length);
+extern int sha224_final(struct sha256_state *sctx, u8 *hash);
+
 #endif
diff --git a/include/crypto/sha1_base.h b/include/crypto/sha1_base.h
index 63c14f2..20fd1f7 100644
--- a/include/crypto/sha1_base.h
+++ b/include/crypto/sha1_base.h
@@ -5,6 +5,9 @@
  * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
  */
 
+#ifndef _CRYPTO_SHA1_BASE_H
+#define _CRYPTO_SHA1_BASE_H
+
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <linux/crypto.h>
@@ -101,3 +104,5 @@
 	*sctx = (struct sha1_state){};
 	return 0;
 }
+
+#endif /* _CRYPTO_SHA1_BASE_H */
diff --git a/include/crypto/sha256_base.h b/include/crypto/sha256_base.h
index 59159bc..cea60cf 100644
--- a/include/crypto/sha256_base.h
+++ b/include/crypto/sha256_base.h
@@ -5,6 +5,9 @@
  * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
  */
 
+#ifndef _CRYPTO_SHA256_BASE_H
+#define _CRYPTO_SHA256_BASE_H
+
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <linux/crypto.h>
@@ -19,34 +22,14 @@
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
-	sctx->state[0] = SHA224_H0;
-	sctx->state[1] = SHA224_H1;
-	sctx->state[2] = SHA224_H2;
-	sctx->state[3] = SHA224_H3;
-	sctx->state[4] = SHA224_H4;
-	sctx->state[5] = SHA224_H5;
-	sctx->state[6] = SHA224_H6;
-	sctx->state[7] = SHA224_H7;
-	sctx->count = 0;
-
-	return 0;
+	return sha224_init(sctx);
 }
 
 static inline int sha256_base_init(struct shash_desc *desc)
 {
 	struct sha256_state *sctx = shash_desc_ctx(desc);
 
-	sctx->state[0] = SHA256_H0;
-	sctx->state[1] = SHA256_H1;
-	sctx->state[2] = SHA256_H2;
-	sctx->state[3] = SHA256_H3;
-	sctx->state[4] = SHA256_H4;
-	sctx->state[5] = SHA256_H5;
-	sctx->state[6] = SHA256_H6;
-	sctx->state[7] = SHA256_H7;
-	sctx->count = 0;
-
-	return 0;
+	return sha256_init(sctx);
 }
 
 static inline int sha256_base_do_update(struct shash_desc *desc,
@@ -123,3 +106,5 @@
 	*sctx = (struct sha256_state){};
 	return 0;
 }
+
+#endif /* _CRYPTO_SHA256_BASE_H */
diff --git a/include/crypto/sha512_base.h b/include/crypto/sha512_base.h
index 099be80..fb19c77 100644
--- a/include/crypto/sha512_base.h
+++ b/include/crypto/sha512_base.h
@@ -5,6 +5,9 @@
  * Copyright (C) 2015 Linaro Ltd <ard.biesheuvel@linaro.org>
  */
 
+#ifndef _CRYPTO_SHA512_BASE_H
+#define _CRYPTO_SHA512_BASE_H
+
 #include <crypto/internal/hash.h>
 #include <crypto/sha.h>
 #include <linux/crypto.h>
@@ -126,3 +129,5 @@
 	*sctx = (struct sha512_state){};
 	return 0;
 }
+
+#endif /* _CRYPTO_SHA512_BASE_H */
diff --git a/include/crypto/sm3_base.h b/include/crypto/sm3_base.h
index 31891b0..1cbf9aa 100644
--- a/include/crypto/sm3_base.h
+++ b/include/crypto/sm3_base.h
@@ -6,6 +6,9 @@
  * Written by Gilad Ben-Yossef <gilad@benyossef.com>
  */
 
+#ifndef _CRYPTO_SM3_BASE_H
+#define _CRYPTO_SM3_BASE_H
+
 #include <crypto/internal/hash.h>
 #include <crypto/sm3.h>
 #include <linux/crypto.h>
@@ -104,3 +107,5 @@
 	*sctx = (struct sm3_state){};
 	return 0;
 }
+
+#endif /* _CRYPTO_SM3_BASE_H */
diff --git a/include/linux/fips.h b/include/linux/fips.h
index afeeece..c6961e9 100644
--- a/include/linux/fips.h
+++ b/include/linux/fips.h
@@ -4,8 +4,15 @@
 
 #ifdef CONFIG_CRYPTO_FIPS
 extern int fips_enabled;
+extern struct atomic_notifier_head fips_fail_notif_chain;
+
+void fips_fail_notify(void);
+
 #else
 #define fips_enabled 0
+
+static inline void fips_fail_notify(void) {}
+
 #endif
 
 #endif
diff --git a/include/linux/padata.h b/include/linux/padata.h
index 56f09e3..23717ee 100644
--- a/include/linux/padata.h
+++ b/include/linux/padata.h
@@ -12,7 +12,6 @@
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include <linux/list.h>
-#include <linux/timer.h>
 #include <linux/notifier.h>
 #include <linux/kobject.h>
 
@@ -36,6 +35,7 @@
 	struct parallel_data	*pd;
 	int			cb_cpu;
 	int			cpu;
+	unsigned int		seq_nr;
 	int			info;
 	void                    (*parallel)(struct padata_priv *padata);
 	void                    (*serial)(struct padata_priv *padata);
@@ -73,20 +73,14 @@
  * @serial: List to wait for serialization after reordering.
  * @pwork: work struct for parallelization.
  * @swork: work struct for serialization.
- * @pd: Backpointer to the internal control structure.
  * @work: work struct for parallelization.
- * @reorder_work: work struct for reordering.
  * @num_obj: Number of objects that are processed by this cpu.
- * @cpu_index: Index of the cpu.
  */
 struct padata_parallel_queue {
        struct padata_list    parallel;
        struct padata_list    reorder;
-       struct parallel_data *pd;
        struct work_struct    work;
-       struct work_struct    reorder_work;
        atomic_t              num_obj;
-       int                   cpu_index;
 };
 
 /**
@@ -110,10 +104,11 @@
  * @reorder_objects: Number of objects waiting in the reorder queues.
  * @refcnt: Number of objects holding a reference on this parallel_data.
  * @max_seq_nr:  Maximal used sequence number.
- * @cpumask: The cpumasks in use for parallel and serial workers.
- * @lock: Reorder lock.
  * @processed: Number of already processed objects.
- * @timer: Reorder timer.
+ * @cpu: Next CPU to be processed.
+ * @cpumask: The cpumasks in use for parallel and serial workers.
+ * @reorder_work: work struct for reordering.
+ * @lock: Reorder lock.
  */
 struct parallel_data {
 	struct padata_instance		*pinst;
@@ -122,17 +117,19 @@
 	atomic_t			reorder_objects;
 	atomic_t			refcnt;
 	atomic_t			seq_nr;
-	struct padata_cpumask		cpumask;
-	spinlock_t                      lock ____cacheline_aligned;
 	unsigned int			processed;
-	struct timer_list		timer;
+	int				cpu;
+	struct padata_cpumask		cpumask;
+	struct work_struct		reorder_work;
+	spinlock_t                      lock ____cacheline_aligned;
 };
 
 /**
  * struct padata_instance - The overall control structure.
  *
  * @cpu_notifier: cpu hotplug notifier.
- * @wq: The workqueue in use.
+ * @parallel_wq: The workqueue used for parallel work.
+ * @serial_wq: The workqueue used for serial work.
  * @pd: The internal control structure.
  * @cpumask: User supplied cpumasks for parallel and serial works.
  * @cpumask_change_notifier: Notifiers chain for user-defined notify
@@ -144,7 +141,8 @@
  */
 struct padata_instance {
 	struct hlist_node		 node;
-	struct workqueue_struct		*wq;
+	struct workqueue_struct		*parallel_wq;
+	struct workqueue_struct		*serial_wq;
 	struct parallel_data		*pd;
 	struct padata_cpumask		cpumask;
 	struct blocking_notifier_head	 cpumask_change_notifier;
@@ -156,11 +154,10 @@
 #define	PADATA_INVALID	4
 };
 
-extern struct padata_instance *padata_alloc_possible(
-					struct workqueue_struct *wq);
+extern struct padata_instance *padata_alloc_possible(const char *name);
 extern void padata_free(struct padata_instance *pinst);
 extern int padata_do_parallel(struct padata_instance *pinst,
-			      struct padata_priv *padata, int cb_cpu);
+			      struct padata_priv *padata, int *cb_cpu);
 extern void padata_do_serial(struct padata_priv *padata);
 extern int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
 			      cpumask_var_t cpumask);
diff --git a/include/linux/sha256.h b/include/linux/sha256.h
deleted file mode 100644
index 26972b9..0000000
--- a/include/linux/sha256.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) 2014 Red Hat Inc.
- *
- *  Author: Vivek Goyal <vgoyal@redhat.com>
- */
-
-#ifndef SHA256_H
-#define SHA256_H
-
-#include <linux/types.h>
-#include <crypto/sha.h>
-
-/*
- * Stand-alone implementation of the SHA256 algorithm. It is designed to
- * have as little dependencies as possible so it can be used in the
- * kexec_file purgatory. In other cases you should use the implementation in
- * crypto/.
- *
- * For details see lib/sha256.c
- */
-
-extern int sha256_init(struct sha256_state *sctx);
-extern int sha256_update(struct sha256_state *sctx, const u8 *input,
-			 unsigned int length);
-extern int sha256_final(struct sha256_state *sctx, u8 *hash);
-
-#endif /* SHA256_H */
diff --git a/include/linux/timeriomem-rng.h b/include/linux/timeriomem-rng.h
index fd4a6e6..672df7f 100644
--- a/include/linux/timeriomem-rng.h
+++ b/include/linux/timeriomem-rng.h
@@ -5,6 +5,9 @@
  * Copyright (c) 2009 Alexander Clouter <alex@digriz.org.uk>
  */
 
+#ifndef _LINUX_TIMERIOMEM_RNG_H
+#define _LINUX_TIMERIOMEM_RNG_H
+
 struct timeriomem_rng_data {
 	void __iomem		*address;
 
@@ -14,3 +17,5 @@
 	/* bits of entropy per 1024 bits read */
 	unsigned int		quality;
 };
+
+#endif /* _LINUX_TIMERIOMEM_RNG_H */
diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h
index b7c585b..4261d1c 100644
--- a/include/linux/workqueue.h
+++ b/include/linux/workqueue.h
@@ -435,6 +435,10 @@
 
 extern void destroy_workqueue(struct workqueue_struct *wq);
 
+struct workqueue_attrs *alloc_workqueue_attrs(void);
+void free_workqueue_attrs(struct workqueue_attrs *attrs);
+int apply_workqueue_attrs(struct workqueue_struct *wq,
+			  const struct workqueue_attrs *attrs);
 int workqueue_set_unbound_cpumask(cpumask_var_t cpumask);
 
 extern bool queue_work_on(int cpu, struct workqueue_struct *wq,
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index ab40d7a..f3f3189 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -170,6 +170,9 @@
 #ifdef CONFIG_XDP_SOCKETS
 	struct netns_xdp	xdp;
 #endif
+#if IS_ENABLED(CONFIG_CRYPTO_USER)
+	struct sock		*crypto_nlsk;
+#endif
 	struct sock		*diag_nlsk;
 	atomic_t		fnhe_genid;
 } __randomize_layout;
diff --git a/include/uapi/linux/cryptouser.h b/include/uapi/linux/cryptouser.h
index 4dc1603..5730c67 100644
--- a/include/uapi/linux/cryptouser.h
+++ b/include/uapi/linux/cryptouser.h
@@ -19,6 +19,9 @@
  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
+#ifndef _UAPI_LINUX_CRYPTOUSER_H
+#define _UAPI_LINUX_CRYPTOUSER_H
+
 #include <linux/types.h>
 
 /* Netlink configuration messages.  */
@@ -198,3 +201,5 @@
 
 #define CRYPTO_REPORT_MAXSIZE (sizeof(struct crypto_user_alg) + \
 			       sizeof(struct crypto_report_blkcipher))
+
+#endif /* _UAPI_LINUX_CRYPTOUSER_H */
diff --git a/kernel/padata.c b/kernel/padata.c
index 15a8ad6..c3fec14 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -46,18 +46,13 @@
 	return target_cpu;
 }
 
-static int padata_cpu_hash(struct parallel_data *pd)
+static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr)
 {
-	unsigned int seq_nr;
-	int cpu_index;
-
 	/*
 	 * Hash the sequence numbers to the cpus by taking
 	 * seq_nr mod. number of cpus in use.
 	 */
-
-	seq_nr = atomic_inc_return(&pd->seq_nr);
-	cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
+	int cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
 
 	return padata_index_to_cpu(pd, cpu_index);
 }
@@ -94,17 +89,19 @@
  *
  * @pinst: padata instance
  * @padata: object to be parallelized
- * @cb_cpu: cpu the serialization callback function will run on,
- *          must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
+ * @cb_cpu: pointer to the CPU that the serialization callback function should
+ *          run on.  If it's not in the serial cpumask of @pinst
+ *          (i.e. cpumask.cbcpu), this function selects a fallback CPU and if
+ *          none found, returns -EINVAL.
  *
  * The parallelization callback function will run with BHs off.
  * Note: Every object which is parallelized by padata_do_parallel
  * must be seen by padata_do_serial.
  */
 int padata_do_parallel(struct padata_instance *pinst,
-		       struct padata_priv *padata, int cb_cpu)
+		       struct padata_priv *padata, int *cb_cpu)
 {
-	int target_cpu, err;
+	int i, cpu, cpu_index, target_cpu, err;
 	struct padata_parallel_queue *queue;
 	struct parallel_data *pd;
 
@@ -116,8 +113,19 @@
 	if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
 		goto out;
 
-	if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
-		goto out;
+	if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) {
+		if (!cpumask_weight(pd->cpumask.cbcpu))
+			goto out;
+
+		/* Select an alternate fallback CPU and notify the caller. */
+		cpu_index = *cb_cpu % cpumask_weight(pd->cpumask.cbcpu);
+
+		cpu = cpumask_first(pd->cpumask.cbcpu);
+		for (i = 0; i < cpu_index; i++)
+			cpu = cpumask_next(cpu, pd->cpumask.cbcpu);
+
+		*cb_cpu = cpu;
+	}
 
 	err =  -EBUSY;
 	if ((pinst->flags & PADATA_RESET))
@@ -129,9 +137,10 @@
 	err = 0;
 	atomic_inc(&pd->refcnt);
 	padata->pd = pd;
-	padata->cb_cpu = cb_cpu;
+	padata->cb_cpu = *cb_cpu;
 
-	target_cpu = padata_cpu_hash(pd);
+	padata->seq_nr = atomic_inc_return(&pd->seq_nr);
+	target_cpu = padata_cpu_hash(pd, padata->seq_nr);
 	padata->cpu = target_cpu;
 	queue = per_cpu_ptr(pd->pqueue, target_cpu);
 
@@ -139,7 +148,7 @@
 	list_add_tail(&padata->list, &queue->parallel.list);
 	spin_unlock(&queue->parallel.lock);
 
-	queue_work_on(target_cpu, pinst->wq, &queue->work);
+	queue_work(pinst->parallel_wq, &queue->work);
 
 out:
 	rcu_read_unlock_bh();
@@ -149,63 +158,53 @@
 EXPORT_SYMBOL(padata_do_parallel);
 
 /*
- * padata_get_next - Get the next object that needs serialization.
+ * padata_find_next - Find the next object that needs serialization.
  *
  * Return values are:
  *
  * A pointer to the control struct of the next object that needs
  * serialization, if present in one of the percpu reorder queues.
  *
- * -EINPROGRESS, if the next object that needs serialization will
+ * NULL, if the next object that needs serialization will
  *  be parallel processed by another cpu and is not yet present in
  *  the cpu's reorder queue.
- *
- * -ENODATA, if this cpu has to do the parallel processing for
- *  the next object.
  */
-static struct padata_priv *padata_get_next(struct parallel_data *pd)
+static struct padata_priv *padata_find_next(struct parallel_data *pd,
+					    bool remove_object)
 {
-	int cpu, num_cpus;
-	unsigned int next_nr, next_index;
 	struct padata_parallel_queue *next_queue;
 	struct padata_priv *padata;
 	struct padata_list *reorder;
+	int cpu = pd->cpu;
 
-	num_cpus = cpumask_weight(pd->cpumask.pcpu);
-
-	/*
-	 * Calculate the percpu reorder queue and the sequence
-	 * number of the next object.
-	 */
-	next_nr = pd->processed;
-	next_index = next_nr % num_cpus;
-	cpu = padata_index_to_cpu(pd, next_index);
 	next_queue = per_cpu_ptr(pd->pqueue, cpu);
-
 	reorder = &next_queue->reorder;
 
 	spin_lock(&reorder->lock);
-	if (!list_empty(&reorder->list)) {
-		padata = list_entry(reorder->list.next,
-				    struct padata_priv, list);
+	if (list_empty(&reorder->list)) {
+		spin_unlock(&reorder->lock);
+		return NULL;
+	}
 
+	padata = list_entry(reorder->list.next, struct padata_priv, list);
+
+	/*
+	 * Checks the rare case where two or more parallel jobs have hashed to
+	 * the same CPU and one of the later ones finishes first.
+	 */
+	if (padata->seq_nr != pd->processed) {
+		spin_unlock(&reorder->lock);
+		return NULL;
+	}
+
+	if (remove_object) {
 		list_del_init(&padata->list);
 		atomic_dec(&pd->reorder_objects);
-
-		pd->processed++;
-
-		spin_unlock(&reorder->lock);
-		goto out;
+		++pd->processed;
+		pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
 	}
+
 	spin_unlock(&reorder->lock);
-
-	if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
-		padata = ERR_PTR(-ENODATA);
-		goto out;
-	}
-
-	padata = ERR_PTR(-EINPROGRESS);
-out:
 	return padata;
 }
 
@@ -215,6 +214,7 @@
 	struct padata_priv *padata;
 	struct padata_serial_queue *squeue;
 	struct padata_instance *pinst = pd->pinst;
+	struct padata_parallel_queue *next_queue;
 
 	/*
 	 * We need to ensure that only one cpu can work on dequeueing of
@@ -230,27 +230,16 @@
 		return;
 
 	while (1) {
-		padata = padata_get_next(pd);
+		padata = padata_find_next(pd, true);
 
 		/*
 		 * If the next object that needs serialization is parallel
 		 * processed by another cpu and is still on it's way to the
 		 * cpu's reorder queue, nothing to do for now.
 		 */
-		if (PTR_ERR(padata) == -EINPROGRESS)
+		if (!padata)
 			break;
 
-		/*
-		 * This cpu has to do the parallel processing of the next
-		 * object. It's waiting in the cpu's parallelization queue,
-		 * so exit immediately.
-		 */
-		if (PTR_ERR(padata) == -ENODATA) {
-			del_timer(&pd->timer);
-			spin_unlock_bh(&pd->lock);
-			return;
-		}
-
 		cb_cpu = padata->cb_cpu;
 		squeue = per_cpu_ptr(pd->squeue, cb_cpu);
 
@@ -258,77 +247,37 @@
 		list_add_tail(&padata->list, &squeue->serial.list);
 		spin_unlock(&squeue->serial.lock);
 
-		queue_work_on(cb_cpu, pinst->wq, &squeue->work);
+		queue_work_on(cb_cpu, pinst->serial_wq, &squeue->work);
 	}
 
 	spin_unlock_bh(&pd->lock);
 
 	/*
 	 * The next object that needs serialization might have arrived to
-	 * the reorder queues in the meantime, we will be called again
-	 * from the timer function if no one else cares for it.
+	 * the reorder queues in the meantime.
 	 *
-	 * Ensure reorder_objects is read after pd->lock is dropped so we see
-	 * an increment from another task in padata_do_serial.  Pairs with
+	 * Ensure reorder queue is read after pd->lock is dropped so we see
+	 * new objects from another task in padata_do_serial.  Pairs with
 	 * smp_mb__after_atomic in padata_do_serial.
 	 */
 	smp_mb();
-	if (atomic_read(&pd->reorder_objects)
-			&& !(pinst->flags & PADATA_RESET))
-		mod_timer(&pd->timer, jiffies + HZ);
-	else
-		del_timer(&pd->timer);
 
-	return;
+	next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
+	if (!list_empty(&next_queue->reorder.list) &&
+	    padata_find_next(pd, false))
+		queue_work(pinst->serial_wq, &pd->reorder_work);
 }
 
 static void invoke_padata_reorder(struct work_struct *work)
 {
-	struct padata_parallel_queue *pqueue;
 	struct parallel_data *pd;
 
 	local_bh_disable();
-	pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
-	pd = pqueue->pd;
+	pd = container_of(work, struct parallel_data, reorder_work);
 	padata_reorder(pd);
 	local_bh_enable();
 }
 
-static void padata_reorder_timer(struct timer_list *t)
-{
-	struct parallel_data *pd = from_timer(pd, t, timer);
-	unsigned int weight;
-	int target_cpu, cpu;
-
-	cpu = get_cpu();
-
-	/* We don't lock pd here to not interfere with parallel processing
-	 * padata_reorder() calls on other CPUs. We just need any CPU out of
-	 * the cpumask.pcpu set. It would be nice if it's the right one but
-	 * it doesn't matter if we're off to the next one by using an outdated
-	 * pd->processed value.
-	 */
-	weight = cpumask_weight(pd->cpumask.pcpu);
-	target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
-
-	/* ensure to call the reorder callback on the correct CPU */
-	if (cpu != target_cpu) {
-		struct padata_parallel_queue *pqueue;
-		struct padata_instance *pinst;
-
-		/* The timer function is serialized wrt itself -- no locking
-		 * needed.
-		 */
-		pinst = pd->pinst;
-		pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
-		queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
-	} else {
-		padata_reorder(pd);
-	}
-
-	put_cpu();
-}
-
 static void padata_serial_worker(struct work_struct *serial_work)
 {
 	struct padata_serial_queue *squeue;
@@ -367,47 +316,28 @@
  */
 void padata_do_serial(struct padata_priv *padata)
 {
-	int cpu;
-	struct padata_parallel_queue *pqueue;
-	struct parallel_data *pd;
-	int reorder_via_wq = 0;
-
-	pd = padata->pd;
-
-	cpu = get_cpu();
-
-	/* We need to run on the same CPU padata_do_parallel(.., padata, ..)
-	 * was called on -- or, at least, enqueue the padata object into the
-	 * correct per-cpu queue.
-	 */
-	if (cpu != padata->cpu) {
-		reorder_via_wq = 1;
-		cpu = padata->cpu;
-	}
-
-	pqueue = per_cpu_ptr(pd->pqueue, cpu);
+	struct parallel_data *pd = padata->pd;
+	struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
+							   padata->cpu);
+	struct padata_priv *cur;
 
 	spin_lock(&pqueue->reorder.lock);
+	/* Sort in ascending order of sequence number. */
+	list_for_each_entry_reverse(cur, &pqueue->reorder.list, list)
+		if (cur->seq_nr < padata->seq_nr)
+			break;
+	list_add(&padata->list, &cur->list);
 	atomic_inc(&pd->reorder_objects);
-	list_add_tail(&padata->list, &pqueue->reorder.list);
 	spin_unlock(&pqueue->reorder.lock);
 
 	/*
-	 * Ensure the atomic_inc of reorder_objects above is ordered correctly
+	 * Ensure the addition to the reorder list is ordered correctly
 	 * with the trylock of pd->lock in padata_reorder.  Pairs with smp_mb
 	 * in padata_reorder.
 	 */
 	smp_mb__after_atomic();
 
-	put_cpu();
-
-	/* If we're running on the wrong CPU, call padata_reorder() via a
-	 * kernel worker.
-	 */
-	if (reorder_via_wq)
-		queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
-	else
-		padata_reorder(pd);
+	padata_reorder(pd);
 }
 EXPORT_SYMBOL(padata_do_serial);
 
@@ -415,17 +345,36 @@
 				 const struct cpumask *pcpumask,
 				 const struct cpumask *cbcpumask)
 {
+	struct workqueue_attrs *attrs;
+	int err = -ENOMEM;
+
 	if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
-		return -ENOMEM;
-
+		goto out;
 	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
-	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
-		free_cpumask_var(pd->cpumask.pcpu);
-		return -ENOMEM;
-	}
 
+	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
+		goto free_pcpu_mask;
 	cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
+
+	attrs = alloc_workqueue_attrs();
+	if (!attrs)
+		goto free_cbcpu_mask;
+
+	/* Restrict parallel_wq workers to pd->cpumask.pcpu. */
+	cpumask_copy(attrs->cpumask, pd->cpumask.pcpu);
+	err = apply_workqueue_attrs(pd->pinst->parallel_wq, attrs);
+	free_workqueue_attrs(attrs);
+	if (err < 0)
+		goto free_cbcpu_mask;
+
 	return 0;
+
+free_cbcpu_mask:
+	free_cpumask_var(pd->cpumask.cbcpu);
+free_pcpu_mask:
+	free_cpumask_var(pd->cpumask.pcpu);
+out:
+	return err;
 }
 
 static void __padata_list_init(struct padata_list *pd_list)
@@ -451,26 +400,15 @@
 /* Initialize all percpu queues used by parallel workers */
 static void padata_init_pqueues(struct parallel_data *pd)
 {
-	int cpu_index, cpu;
+	int cpu;
 	struct padata_parallel_queue *pqueue;
 
-	cpu_index = 0;
-	for_each_possible_cpu(cpu) {
+	for_each_cpu(cpu, pd->cpumask.pcpu) {
 		pqueue = per_cpu_ptr(pd->pqueue, cpu);
 
-		if (!cpumask_test_cpu(cpu, pd->cpumask.pcpu)) {
-			pqueue->cpu_index = -1;
-			continue;
-		}
-
-		pqueue->pd = pd;
-		pqueue->cpu_index = cpu_index;
-		cpu_index++;
-
 		__padata_list_init(&pqueue->reorder);
 		__padata_list_init(&pqueue->parallel);
 		INIT_WORK(&pqueue->work, padata_parallel_worker);
-		INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
 		atomic_set(&pqueue->num_obj, 0);
 	}
 }
@@ -493,17 +431,19 @@
 	pd->squeue = alloc_percpu(struct padata_serial_queue);
 	if (!pd->squeue)
 		goto err_free_pqueue;
+
+	pd->pinst = pinst;
 	if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
 		goto err_free_squeue;
 
 	padata_init_pqueues(pd);
 	padata_init_squeues(pd);
-	timer_setup(&pd->timer, padata_reorder_timer, 0);
 	atomic_set(&pd->seq_nr, -1);
 	atomic_set(&pd->reorder_objects, 0);
 	atomic_set(&pd->refcnt, 0);
-	pd->pinst = pinst;
 	spin_lock_init(&pd->lock);
+	pd->cpu = cpumask_first(pd->cpumask.pcpu);
+	INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
 
 	return pd;
 
@@ -538,8 +478,6 @@
 		flush_work(&pqueue->work);
 	}
 
-	del_timer_sync(&pd->timer);
-
 	if (atomic_read(&pd->reorder_objects))
 		padata_reorder(pd);
 
@@ -883,6 +821,8 @@
 	padata_free_pd(pinst->pd);
 	free_cpumask_var(pinst->cpumask.pcpu);
 	free_cpumask_var(pinst->cpumask.cbcpu);
+	destroy_workqueue(pinst->serial_wq);
+	destroy_workqueue(pinst->parallel_wq);
 	kfree(pinst);
 }
 
@@ -1016,13 +956,11 @@
  * padata_alloc - allocate and initialize a padata instance and specify
  *                cpumasks for serial and parallel workers.
  *
- * @wq: workqueue to use for the allocated padata instance
+ * @name: used to identify the instance
  * @pcpumask: cpumask that will be used for padata parallelization
  * @cbcpumask: cpumask that will be used for padata serialization
- *
- * Must be called from a cpus_read_lock() protected region
  */
-static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
+static struct padata_instance *padata_alloc(const char *name,
 					    const struct cpumask *pcpumask,
 					    const struct cpumask *cbcpumask)
 {
@@ -1033,11 +971,23 @@
 	if (!pinst)
 		goto err;
 
-	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
+	pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_UNBOUND, 0,
+					     name);
+	if (!pinst->parallel_wq)
 		goto err_free_inst;
+
+	get_online_cpus();
+
+	pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM |
+					   WQ_CPU_INTENSIVE, 1, name);
+	if (!pinst->serial_wq)
+		goto err_put_cpus;
+
+	if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
+		goto err_free_serial_wq;
 	if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
 		free_cpumask_var(pinst->cpumask.pcpu);
-		goto err_free_inst;
+		goto err_free_serial_wq;
 	}
 	if (!padata_validate_cpumask(pinst, pcpumask) ||
 	    !padata_validate_cpumask(pinst, cbcpumask))
@@ -1049,8 +999,6 @@
 
 	rcu_assign_pointer(pinst->pd, pd);
 
-	pinst->wq = wq;
-
 	cpumask_copy(pinst->cpumask.pcpu, pcpumask);
 	cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
 
@@ -1063,11 +1011,19 @@
 #ifdef CONFIG_HOTPLUG_CPU
 	cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
 #endif
+
+	put_online_cpus();
+
 	return pinst;
 
 err_free_masks:
 	free_cpumask_var(pinst->cpumask.pcpu);
 	free_cpumask_var(pinst->cpumask.cbcpu);
+err_free_serial_wq:
+	destroy_workqueue(pinst->serial_wq);
+err_put_cpus:
+	put_online_cpus();
+	destroy_workqueue(pinst->parallel_wq);
 err_free_inst:
 	kfree(pinst);
 err:
@@ -1079,14 +1035,11 @@
  *                         Use the cpu_possible_mask for serial and
  *                         parallel workers.
  *
- * @wq: workqueue to use for the allocated padata instance
- *
- * Must be called from a cpus_read_lock() protected region
+ * @name: used to identify the instance
  */
-struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
+struct padata_instance *padata_alloc_possible(const char *name)
 {
-	lockdep_assert_cpus_held();
-	return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
+	return padata_alloc(name, cpu_possible_mask, cpu_possible_mask);
 }
 EXPORT_SYMBOL(padata_alloc_possible);
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 601d611..bc2e09a 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -3329,7 +3329,7 @@
  *
  * Undo alloc_workqueue_attrs().
  */
-static void free_workqueue_attrs(struct workqueue_attrs *attrs)
+void free_workqueue_attrs(struct workqueue_attrs *attrs)
 {
 	if (attrs) {
 		free_cpumask_var(attrs->cpumask);
@@ -3345,7 +3345,7 @@
  *
  * Return: The allocated new workqueue_attr on success. %NULL on failure.
  */
-static struct workqueue_attrs *alloc_workqueue_attrs(void)
+struct workqueue_attrs *alloc_workqueue_attrs(void)
 {
 	struct workqueue_attrs *attrs;
 
@@ -4030,16 +4030,20 @@
  *
  * Performs GFP_KERNEL allocations.
  *
+ * Assumes caller has CPU hotplug read exclusion, i.e. get_online_cpus().
+ *
  * Return: 0 on success and -errno on failure.
  */
-static int apply_workqueue_attrs(struct workqueue_struct *wq,
+int apply_workqueue_attrs(struct workqueue_struct *wq,
 			  const struct workqueue_attrs *attrs)
 {
 	int ret;
 
-	apply_wqattrs_lock();
+	lockdep_assert_cpus_held();
+
+	mutex_lock(&wq_pool_mutex);
 	ret = apply_workqueue_attrs_locked(wq, attrs);
-	apply_wqattrs_unlock();
+	mutex_unlock(&wq_pool_mutex);
 
 	return ret;
 }
@@ -4152,16 +4156,21 @@
 			mutex_unlock(&wq->mutex);
 		}
 		return 0;
-	} else if (wq->flags & __WQ_ORDERED) {
+	}
+
+	get_online_cpus();
+	if (wq->flags & __WQ_ORDERED) {
 		ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]);
 		/* there should only be single pwq for ordering guarantee */
 		WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node ||
 			      wq->pwqs.prev != &wq->dfl_pwq->pwqs_node),
 		     "ordering guarantee broken for workqueue %s\n", wq->name);
-		return ret;
 	} else {
-		return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
+		ret = apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]);
 	}
+	put_online_cpus();
+
+	return ret;
 }
 
 static int wq_clamp_max_active(int max_active, unsigned int flags,
diff --git a/lib/crypto/Makefile b/lib/crypto/Makefile
index 88195c3..cbe0b6a 100644
--- a/lib/crypto/Makefile
+++ b/lib/crypto/Makefile
@@ -1,4 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0
 
+obj-$(CONFIG_CRYPTO_LIB_AES) += libaes.o
+libaes-y := aes.o
+
 obj-$(CONFIG_CRYPTO_LIB_ARC4) += libarc4.o
 libarc4-y := arc4.o
+
+obj-$(CONFIG_CRYPTO_LIB_DES) += libdes.o
+libdes-y := des.o
+
+obj-$(CONFIG_CRYPTO_LIB_SHA256) += libsha256.o
+libsha256-y := sha256.o
diff --git a/lib/crypto/aes.c b/lib/crypto/aes.c
new file mode 100644
index 0000000..827fe89
--- /dev/null
+++ b/lib/crypto/aes.c
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017-2019 Linaro Ltd <ard.biesheuvel@linaro.org>
+ */
+
+#include <crypto/aes.h>
+#include <linux/crypto.h>
+#include <linux/module.h>
+#include <asm/unaligned.h>
+
+/*
+ * Emit the sbox as volatile const to prevent the compiler from doing
+ * constant folding on sbox references involving fixed indexes.
+ */
+static volatile const u8 __cacheline_aligned aes_sbox[] = {
+	0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
+	0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
+	0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
+	0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
+	0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
+	0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
+	0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
+	0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
+	0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
+	0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
+	0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
+	0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
+	0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
+	0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
+	0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
+	0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
+	0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
+	0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
+	0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
+	0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
+	0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
+	0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
+	0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
+	0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
+	0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
+	0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
+	0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
+	0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
+	0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
+	0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
+	0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
+	0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16,
+};
+
+static volatile const u8 __cacheline_aligned aes_inv_sbox[] = {
+	0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
+	0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
+	0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
+	0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
+	0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
+	0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
+	0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
+	0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
+	0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
+	0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
+	0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
+	0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
+	0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
+	0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
+	0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
+	0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
+	0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
+	0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
+	0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
+	0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
+	0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
+	0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
+	0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
+	0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
+	0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
+	0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
+	0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
+	0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
+	0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
+	0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
+	0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
+	0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d,
+};
+
+extern const u8 crypto_aes_sbox[256] __alias(aes_sbox);
+extern const u8 crypto_aes_inv_sbox[256] __alias(aes_inv_sbox);
+
+EXPORT_SYMBOL(crypto_aes_sbox);
+EXPORT_SYMBOL(crypto_aes_inv_sbox);
+
+static u32 mul_by_x(u32 w)
+{
+	u32 x = w & 0x7f7f7f7f;
+	u32 y = w & 0x80808080;
+
+	/* multiply by polynomial 'x' (0b10) in GF(2^8) */
+	return (x << 1) ^ (y >> 7) * 0x1b;
+}
+
+static u32 mul_by_x2(u32 w)
+{
+	u32 x = w & 0x3f3f3f3f;
+	u32 y = w & 0x80808080;
+	u32 z = w & 0x40404040;
+
+	/* multiply by polynomial 'x^2' (0b100) in GF(2^8) */
+	return (x << 2) ^ (y >> 7) * 0x36 ^ (z >> 6) * 0x1b;
+}
+
+static u32 mix_columns(u32 x)
+{
+	/*
+	 * Perform the following matrix multiplication in GF(2^8)
+	 *
+	 * | 0x2 0x3 0x1 0x1 |   | x[0] |
+	 * | 0x1 0x2 0x3 0x1 |   | x[1] |
+	 * | 0x1 0x1 0x2 0x3 | x | x[2] |
+	 * | 0x3 0x1 0x1 0x2 |   | x[3] |
+	 */
+	u32 y = mul_by_x(x) ^ ror32(x, 16);
+
+	return y ^ ror32(x ^ y, 8);
+}
+
+static u32 inv_mix_columns(u32 x)
+{
+	/*
+	 * Perform the following matrix multiplication in GF(2^8)
+	 *
+	 * | 0xe 0xb 0xd 0x9 |   | x[0] |
+	 * | 0x9 0xe 0xb 0xd |   | x[1] |
+	 * | 0xd 0x9 0xe 0xb | x | x[2] |
+	 * | 0xb 0xd 0x9 0xe |   | x[3] |
+	 *
+	 * which can conveniently be reduced to
+	 *
+	 * | 0x2 0x3 0x1 0x1 |   | 0x5 0x0 0x4 0x0 |   | x[0] |
+	 * | 0x1 0x2 0x3 0x1 |   | 0x0 0x5 0x0 0x4 |   | x[1] |
+	 * | 0x1 0x1 0x2 0x3 | x | 0x4 0x0 0x5 0x0 | x | x[2] |
+	 * | 0x3 0x1 0x1 0x2 |   | 0x0 0x4 0x0 0x5 |   | x[3] |
+	 */
+	u32 y = mul_by_x2(x);
+
+	return mix_columns(x ^ y ^ ror32(y, 16));
+}
+
+static __always_inline u32 subshift(u32 in[], int pos)
+{
+	return (aes_sbox[in[pos] & 0xff]) ^
+	       (aes_sbox[(in[(pos + 1) % 4] >>  8) & 0xff] <<  8) ^
+	       (aes_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
+	       (aes_sbox[(in[(pos + 3) % 4] >> 24) & 0xff] << 24);
+}
+
+static __always_inline u32 inv_subshift(u32 in[], int pos)
+{
+	return (aes_inv_sbox[in[pos] & 0xff]) ^
+	       (aes_inv_sbox[(in[(pos + 3) % 4] >>  8) & 0xff] <<  8) ^
+	       (aes_inv_sbox[(in[(pos + 2) % 4] >> 16) & 0xff] << 16) ^
+	       (aes_inv_sbox[(in[(pos + 1) % 4] >> 24) & 0xff] << 24);
+}
+
+static u32 subw(u32 in)
+{
+	return (aes_sbox[in & 0xff]) ^
+	       (aes_sbox[(in >>  8) & 0xff] <<  8) ^
+	       (aes_sbox[(in >> 16) & 0xff] << 16) ^
+	       (aes_sbox[(in >> 24) & 0xff] << 24);
+}
+
+/**
+ * aes_expandkey - Expands the AES key as described in FIPS-197
+ * @ctx:	The location where the computed key will be stored.
+ * @in_key:	The supplied key.
+ * @key_len:	The length of the supplied key.
+ *
+ * Returns 0 on success. The function fails only if an invalid key size (or
+ * pointer) is supplied.
+ * The expanded key size is 240 bytes (max of 14 rounds with a unique 16 bytes
+ * key schedule plus a 16 bytes key which is used before the first round).
+ * The decryption key is prepared for the "Equivalent Inverse Cipher" as
+ * described in FIPS-197. The first slot (16 bytes) of each key (enc or dec) is
+ * for the initial combination, the second slot for the first round and so on.
+ */
+int aes_expandkey(struct crypto_aes_ctx *ctx, const u8 *in_key,
+		  unsigned int key_len)
+{
+	u32 kwords = key_len / sizeof(u32);
+	u32 rc, i, j;
+	int err;
+
+	err = aes_check_keylen(key_len);
+	if (err)
+		return err;
+
+	ctx->key_length = key_len;
+
+	for (i = 0; i < kwords; i++)
+		ctx->key_enc[i] = get_unaligned_le32(in_key + i * sizeof(u32));
+
+	for (i = 0, rc = 1; i < 10; i++, rc = mul_by_x(rc)) {
+		u32 *rki = ctx->key_enc + (i * kwords);
+		u32 *rko = rki + kwords;
+
+		rko[0] = ror32(subw(rki[kwords - 1]), 8) ^ rc ^ rki[0];
+		rko[1] = rko[0] ^ rki[1];
+		rko[2] = rko[1] ^ rki[2];
+		rko[3] = rko[2] ^ rki[3];
+
+		if (key_len == AES_KEYSIZE_192) {
+			if (i >= 7)
+				break;
+			rko[4] = rko[3] ^ rki[4];
+			rko[5] = rko[4] ^ rki[5];
+		} else if (key_len == AES_KEYSIZE_256) {
+			if (i >= 6)
+				break;
+			rko[4] = subw(rko[3]) ^ rki[4];
+			rko[5] = rko[4] ^ rki[5];
+			rko[6] = rko[5] ^ rki[6];
+			rko[7] = rko[6] ^ rki[7];
+		}
+	}
+
+	/*
+	 * Generate the decryption keys for the Equivalent Inverse Cipher.
+	 * This involves reversing the order of the round keys, and applying
+	 * the Inverse Mix Columns transformation to all but the first and
+	 * the last one.
+	 */
+	ctx->key_dec[0] = ctx->key_enc[key_len + 24];
+	ctx->key_dec[1] = ctx->key_enc[key_len + 25];
+	ctx->key_dec[2] = ctx->key_enc[key_len + 26];
+	ctx->key_dec[3] = ctx->key_enc[key_len + 27];
+
+	for (i = 4, j = key_len + 20; j > 0; i += 4, j -= 4) {
+		ctx->key_dec[i]     = inv_mix_columns(ctx->key_enc[j]);
+		ctx->key_dec[i + 1] = inv_mix_columns(ctx->key_enc[j + 1]);
+		ctx->key_dec[i + 2] = inv_mix_columns(ctx->key_enc[j + 2]);
+		ctx->key_dec[i + 3] = inv_mix_columns(ctx->key_enc[j + 3]);
+	}
+
+	ctx->key_dec[i]     = ctx->key_enc[0];
+	ctx->key_dec[i + 1] = ctx->key_enc[1];
+	ctx->key_dec[i + 2] = ctx->key_enc[2];
+	ctx->key_dec[i + 3] = ctx->key_enc[3];
+
+	return 0;
+}
+EXPORT_SYMBOL(aes_expandkey);
+
+/**
+ * aes_encrypt - Encrypt a single AES block
+ * @ctx:	Context struct containing the key schedule
+ * @out:	Buffer to store the ciphertext
+ * @in:		Buffer containing the plaintext
+ */
+void aes_encrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
+{
+	const u32 *rkp = ctx->key_enc + 4;
+	int rounds = 6 + ctx->key_length / 4;
+	u32 st0[4], st1[4];
+	int round;
+
+	st0[0] = ctx->key_enc[0] ^ get_unaligned_le32(in);
+	st0[1] = ctx->key_enc[1] ^ get_unaligned_le32(in + 4);
+	st0[2] = ctx->key_enc[2] ^ get_unaligned_le32(in + 8);
+	st0[3] = ctx->key_enc[3] ^ get_unaligned_le32(in + 12);
+
+	/*
+	 * Force the compiler to emit data independent Sbox references,
+	 * by xoring the input with Sbox values that are known to add up
+	 * to zero. This pulls the entire Sbox into the D-cache before any
+	 * data dependent lookups are done.
+	 */
+	st0[0] ^= aes_sbox[ 0] ^ aes_sbox[ 64] ^ aes_sbox[134] ^ aes_sbox[195];
+	st0[1] ^= aes_sbox[16] ^ aes_sbox[ 82] ^ aes_sbox[158] ^ aes_sbox[221];
+	st0[2] ^= aes_sbox[32] ^ aes_sbox[ 96] ^ aes_sbox[160] ^ aes_sbox[234];
+	st0[3] ^= aes_sbox[48] ^ aes_sbox[112] ^ aes_sbox[186] ^ aes_sbox[241];
+
+	for (round = 0;; round += 2, rkp += 8) {
+		st1[0] = mix_columns(subshift(st0, 0)) ^ rkp[0];
+		st1[1] = mix_columns(subshift(st0, 1)) ^ rkp[1];
+		st1[2] = mix_columns(subshift(st0, 2)) ^ rkp[2];
+		st1[3] = mix_columns(subshift(st0, 3)) ^ rkp[3];
+
+		if (round == rounds - 2)
+			break;
+
+		st0[0] = mix_columns(subshift(st1, 0)) ^ rkp[4];
+		st0[1] = mix_columns(subshift(st1, 1)) ^ rkp[5];
+		st0[2] = mix_columns(subshift(st1, 2)) ^ rkp[6];
+		st0[3] = mix_columns(subshift(st1, 3)) ^ rkp[7];
+	}
+
+	put_unaligned_le32(subshift(st1, 0) ^ rkp[4], out);
+	put_unaligned_le32(subshift(st1, 1) ^ rkp[5], out + 4);
+	put_unaligned_le32(subshift(st1, 2) ^ rkp[6], out + 8);
+	put_unaligned_le32(subshift(st1, 3) ^ rkp[7], out + 12);
+}
+EXPORT_SYMBOL(aes_encrypt);
+
+/**
+ * aes_decrypt - Decrypt a single AES block
+ * @ctx:	Context struct containing the key schedule
+ * @out:	Buffer to store the plaintext
+ * @in:		Buffer containing the ciphertext
+ */
+void aes_decrypt(const struct crypto_aes_ctx *ctx, u8 *out, const u8 *in)
+{
+	const u32 *rkp = ctx->key_dec + 4;
+	int rounds = 6 + ctx->key_length / 4;
+	u32 st0[4], st1[4];
+	int round;
+
+	st0[0] = ctx->key_dec[0] ^ get_unaligned_le32(in);
+	st0[1] = ctx->key_dec[1] ^ get_unaligned_le32(in + 4);
+	st0[2] = ctx->key_dec[2] ^ get_unaligned_le32(in + 8);
+	st0[3] = ctx->key_dec[3] ^ get_unaligned_le32(in + 12);
+
+	/*
+	 * Force the compiler to emit data independent Sbox references,
+	 * by xoring the input with Sbox values that are known to add up
+	 * to zero. This pulls the entire Sbox into the D-cache before any
+	 * data dependent lookups are done.
+	 */
+	st0[0] ^= aes_inv_sbox[ 0] ^ aes_inv_sbox[ 64] ^ aes_inv_sbox[129] ^ aes_inv_sbox[200];
+	st0[1] ^= aes_inv_sbox[16] ^ aes_inv_sbox[ 83] ^ aes_inv_sbox[150] ^ aes_inv_sbox[212];
+	st0[2] ^= aes_inv_sbox[32] ^ aes_inv_sbox[ 96] ^ aes_inv_sbox[160] ^ aes_inv_sbox[236];
+	st0[3] ^= aes_inv_sbox[48] ^ aes_inv_sbox[112] ^ aes_inv_sbox[187] ^ aes_inv_sbox[247];
+
+	for (round = 0;; round += 2, rkp += 8) {
+		st1[0] = inv_mix_columns(inv_subshift(st0, 0)) ^ rkp[0];
+		st1[1] = inv_mix_columns(inv_subshift(st0, 1)) ^ rkp[1];
+		st1[2] = inv_mix_columns(inv_subshift(st0, 2)) ^ rkp[2];
+		st1[3] = inv_mix_columns(inv_subshift(st0, 3)) ^ rkp[3];
+
+		if (round == rounds - 2)
+			break;
+
+		st0[0] = inv_mix_columns(inv_subshift(st1, 0)) ^ rkp[4];
+		st0[1] = inv_mix_columns(inv_subshift(st1, 1)) ^ rkp[5];
+		st0[2] = inv_mix_columns(inv_subshift(st1, 2)) ^ rkp[6];
+		st0[3] = inv_mix_columns(inv_subshift(st1, 3)) ^ rkp[7];
+	}
+
+	put_unaligned_le32(inv_subshift(st1, 0) ^ rkp[4], out);
+	put_unaligned_le32(inv_subshift(st1, 1) ^ rkp[5], out + 4);
+	put_unaligned_le32(inv_subshift(st1, 2) ^ rkp[6], out + 8);
+	put_unaligned_le32(inv_subshift(st1, 3) ^ rkp[7], out + 12);
+}
+EXPORT_SYMBOL(aes_decrypt);
+
+MODULE_DESCRIPTION("Generic AES library");
+MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/lib/crypto/des.c b/lib/crypto/des.c
new file mode 100644
index 0000000..ef5bb88
--- /dev/null
+++ b/lib/crypto/des.c
@@ -0,0 +1,902 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cryptographic API.
+ *
+ * DES & Triple DES EDE Cipher Algorithms.
+ *
+ * Copyright (c) 2005 Dag Arne Osvik <da@osvik.no>
+ */
+
+#include <linux/bitops.h>
+#include <linux/compiler.h>
+#include <linux/crypto.h>
+#include <linux/errno.h>
+#include <linux/fips.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/unaligned.h>
+
+#include <crypto/des.h>
+#include <crypto/internal/des.h>
+
+#define ROL(x, r) ((x) = rol32((x), (r)))
+#define ROR(x, r) ((x) = ror32((x), (r)))
+
+/* Lookup tables for key expansion */
+
+static const u8 pc1[256] = {
+	0x00, 0x00, 0x40, 0x04, 0x10, 0x10, 0x50, 0x14,
+	0x04, 0x40, 0x44, 0x44, 0x14, 0x50, 0x54, 0x54,
+	0x02, 0x02, 0x42, 0x06, 0x12, 0x12, 0x52, 0x16,
+	0x06, 0x42, 0x46, 0x46, 0x16, 0x52, 0x56, 0x56,
+	0x80, 0x08, 0xc0, 0x0c, 0x90, 0x18, 0xd0, 0x1c,
+	0x84, 0x48, 0xc4, 0x4c, 0x94, 0x58, 0xd4, 0x5c,
+	0x82, 0x0a, 0xc2, 0x0e, 0x92, 0x1a, 0xd2, 0x1e,
+	0x86, 0x4a, 0xc6, 0x4e, 0x96, 0x5a, 0xd6, 0x5e,
+	0x20, 0x20, 0x60, 0x24, 0x30, 0x30, 0x70, 0x34,
+	0x24, 0x60, 0x64, 0x64, 0x34, 0x70, 0x74, 0x74,
+	0x22, 0x22, 0x62, 0x26, 0x32, 0x32, 0x72, 0x36,
+	0x26, 0x62, 0x66, 0x66, 0x36, 0x72, 0x76, 0x76,
+	0xa0, 0x28, 0xe0, 0x2c, 0xb0, 0x38, 0xf0, 0x3c,
+	0xa4, 0x68, 0xe4, 0x6c, 0xb4, 0x78, 0xf4, 0x7c,
+	0xa2, 0x2a, 0xe2, 0x2e, 0xb2, 0x3a, 0xf2, 0x3e,
+	0xa6, 0x6a, 0xe6, 0x6e, 0xb6, 0x7a, 0xf6, 0x7e,
+	0x08, 0x80, 0x48, 0x84, 0x18, 0x90, 0x58, 0x94,
+	0x0c, 0xc0, 0x4c, 0xc4, 0x1c, 0xd0, 0x5c, 0xd4,
+	0x0a, 0x82, 0x4a, 0x86, 0x1a, 0x92, 0x5a, 0x96,
+	0x0e, 0xc2, 0x4e, 0xc6, 0x1e, 0xd2, 0x5e, 0xd6,
+	0x88, 0x88, 0xc8, 0x8c, 0x98, 0x98, 0xd8, 0x9c,
+	0x8c, 0xc8, 0xcc, 0xcc, 0x9c, 0xd8, 0xdc, 0xdc,
+	0x8a, 0x8a, 0xca, 0x8e, 0x9a, 0x9a, 0xda, 0x9e,
+	0x8e, 0xca, 0xce, 0xce, 0x9e, 0xda, 0xde, 0xde,
+	0x28, 0xa0, 0x68, 0xa4, 0x38, 0xb0, 0x78, 0xb4,
+	0x2c, 0xe0, 0x6c, 0xe4, 0x3c, 0xf0, 0x7c, 0xf4,
+	0x2a, 0xa2, 0x6a, 0xa6, 0x3a, 0xb2, 0x7a, 0xb6,
+	0x2e, 0xe2, 0x6e, 0xe6, 0x3e, 0xf2, 0x7e, 0xf6,
+	0xa8, 0xa8, 0xe8, 0xac, 0xb8, 0xb8, 0xf8, 0xbc,
+	0xac, 0xe8, 0xec, 0xec, 0xbc, 0xf8, 0xfc, 0xfc,
+	0xaa, 0xaa, 0xea, 0xae, 0xba, 0xba, 0xfa, 0xbe,
+	0xae, 0xea, 0xee, 0xee, 0xbe, 0xfa, 0xfe, 0xfe
+};
+
+static const u8 rs[256] = {
+	0x00, 0x00, 0x80, 0x80, 0x02, 0x02, 0x82, 0x82,
+	0x04, 0x04, 0x84, 0x84, 0x06, 0x06, 0x86, 0x86,
+	0x08, 0x08, 0x88, 0x88, 0x0a, 0x0a, 0x8a, 0x8a,
+	0x0c, 0x0c, 0x8c, 0x8c, 0x0e, 0x0e, 0x8e, 0x8e,
+	0x10, 0x10, 0x90, 0x90, 0x12, 0x12, 0x92, 0x92,
+	0x14, 0x14, 0x94, 0x94, 0x16, 0x16, 0x96, 0x96,
+	0x18, 0x18, 0x98, 0x98, 0x1a, 0x1a, 0x9a, 0x9a,
+	0x1c, 0x1c, 0x9c, 0x9c, 0x1e, 0x1e, 0x9e, 0x9e,
+	0x20, 0x20, 0xa0, 0xa0, 0x22, 0x22, 0xa2, 0xa2,
+	0x24, 0x24, 0xa4, 0xa4, 0x26, 0x26, 0xa6, 0xa6,
+	0x28, 0x28, 0xa8, 0xa8, 0x2a, 0x2a, 0xaa, 0xaa,
+	0x2c, 0x2c, 0xac, 0xac, 0x2e, 0x2e, 0xae, 0xae,
+	0x30, 0x30, 0xb0, 0xb0, 0x32, 0x32, 0xb2, 0xb2,
+	0x34, 0x34, 0xb4, 0xb4, 0x36, 0x36, 0xb6, 0xb6,
+	0x38, 0x38, 0xb8, 0xb8, 0x3a, 0x3a, 0xba, 0xba,
+	0x3c, 0x3c, 0xbc, 0xbc, 0x3e, 0x3e, 0xbe, 0xbe,
+	0x40, 0x40, 0xc0, 0xc0, 0x42, 0x42, 0xc2, 0xc2,
+	0x44, 0x44, 0xc4, 0xc4, 0x46, 0x46, 0xc6, 0xc6,
+	0x48, 0x48, 0xc8, 0xc8, 0x4a, 0x4a, 0xca, 0xca,
+	0x4c, 0x4c, 0xcc, 0xcc, 0x4e, 0x4e, 0xce, 0xce,
+	0x50, 0x50, 0xd0, 0xd0, 0x52, 0x52, 0xd2, 0xd2,
+	0x54, 0x54, 0xd4, 0xd4, 0x56, 0x56, 0xd6, 0xd6,
+	0x58, 0x58, 0xd8, 0xd8, 0x5a, 0x5a, 0xda, 0xda,
+	0x5c, 0x5c, 0xdc, 0xdc, 0x5e, 0x5e, 0xde, 0xde,
+	0x60, 0x60, 0xe0, 0xe0, 0x62, 0x62, 0xe2, 0xe2,
+	0x64, 0x64, 0xe4, 0xe4, 0x66, 0x66, 0xe6, 0xe6,
+	0x68, 0x68, 0xe8, 0xe8, 0x6a, 0x6a, 0xea, 0xea,
+	0x6c, 0x6c, 0xec, 0xec, 0x6e, 0x6e, 0xee, 0xee,
+	0x70, 0x70, 0xf0, 0xf0, 0x72, 0x72, 0xf2, 0xf2,
+	0x74, 0x74, 0xf4, 0xf4, 0x76, 0x76, 0xf6, 0xf6,
+	0x78, 0x78, 0xf8, 0xf8, 0x7a, 0x7a, 0xfa, 0xfa,
+	0x7c, 0x7c, 0xfc, 0xfc, 0x7e, 0x7e, 0xfe, 0xfe
+};
+
+static const u32 pc2[1024] = {
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00040000, 0x00000000, 0x04000000, 0x00100000,
+	0x00400000, 0x00000008, 0x00000800, 0x40000000,
+	0x00440000, 0x00000008, 0x04000800, 0x40100000,
+	0x00000400, 0x00000020, 0x08000000, 0x00000100,
+	0x00040400, 0x00000020, 0x0c000000, 0x00100100,
+	0x00400400, 0x00000028, 0x08000800, 0x40000100,
+	0x00440400, 0x00000028, 0x0c000800, 0x40100100,
+	0x80000000, 0x00000010, 0x00000000, 0x00800000,
+	0x80040000, 0x00000010, 0x04000000, 0x00900000,
+	0x80400000, 0x00000018, 0x00000800, 0x40800000,
+	0x80440000, 0x00000018, 0x04000800, 0x40900000,
+	0x80000400, 0x00000030, 0x08000000, 0x00800100,
+	0x80040400, 0x00000030, 0x0c000000, 0x00900100,
+	0x80400400, 0x00000038, 0x08000800, 0x40800100,
+	0x80440400, 0x00000038, 0x0c000800, 0x40900100,
+	0x10000000, 0x00000000, 0x00200000, 0x00001000,
+	0x10040000, 0x00000000, 0x04200000, 0x00101000,
+	0x10400000, 0x00000008, 0x00200800, 0x40001000,
+	0x10440000, 0x00000008, 0x04200800, 0x40101000,
+	0x10000400, 0x00000020, 0x08200000, 0x00001100,
+	0x10040400, 0x00000020, 0x0c200000, 0x00101100,
+	0x10400400, 0x00000028, 0x08200800, 0x40001100,
+	0x10440400, 0x00000028, 0x0c200800, 0x40101100,
+	0x90000000, 0x00000010, 0x00200000, 0x00801000,
+	0x90040000, 0x00000010, 0x04200000, 0x00901000,
+	0x90400000, 0x00000018, 0x00200800, 0x40801000,
+	0x90440000, 0x00000018, 0x04200800, 0x40901000,
+	0x90000400, 0x00000030, 0x08200000, 0x00801100,
+	0x90040400, 0x00000030, 0x0c200000, 0x00901100,
+	0x90400400, 0x00000038, 0x08200800, 0x40801100,
+	0x90440400, 0x00000038, 0x0c200800, 0x40901100,
+	0x00000200, 0x00080000, 0x00000000, 0x00000004,
+	0x00040200, 0x00080000, 0x04000000, 0x00100004,
+	0x00400200, 0x00080008, 0x00000800, 0x40000004,
+	0x00440200, 0x00080008, 0x04000800, 0x40100004,
+	0x00000600, 0x00080020, 0x08000000, 0x00000104,
+	0x00040600, 0x00080020, 0x0c000000, 0x00100104,
+	0x00400600, 0x00080028, 0x08000800, 0x40000104,
+	0x00440600, 0x00080028, 0x0c000800, 0x40100104,
+	0x80000200, 0x00080010, 0x00000000, 0x00800004,
+	0x80040200, 0x00080010, 0x04000000, 0x00900004,
+	0x80400200, 0x00080018, 0x00000800, 0x40800004,
+	0x80440200, 0x00080018, 0x04000800, 0x40900004,
+	0x80000600, 0x00080030, 0x08000000, 0x00800104,
+	0x80040600, 0x00080030, 0x0c000000, 0x00900104,
+	0x80400600, 0x00080038, 0x08000800, 0x40800104,
+	0x80440600, 0x00080038, 0x0c000800, 0x40900104,
+	0x10000200, 0x00080000, 0x00200000, 0x00001004,
+	0x10040200, 0x00080000, 0x04200000, 0x00101004,
+	0x10400200, 0x00080008, 0x00200800, 0x40001004,
+	0x10440200, 0x00080008, 0x04200800, 0x40101004,
+	0x10000600, 0x00080020, 0x08200000, 0x00001104,
+	0x10040600, 0x00080020, 0x0c200000, 0x00101104,
+	0x10400600, 0x00080028, 0x08200800, 0x40001104,
+	0x10440600, 0x00080028, 0x0c200800, 0x40101104,
+	0x90000200, 0x00080010, 0x00200000, 0x00801004,
+	0x90040200, 0x00080010, 0x04200000, 0x00901004,
+	0x90400200, 0x00080018, 0x00200800, 0x40801004,
+	0x90440200, 0x00080018, 0x04200800, 0x40901004,
+	0x90000600, 0x00080030, 0x08200000, 0x00801104,
+	0x90040600, 0x00080030, 0x0c200000, 0x00901104,
+	0x90400600, 0x00080038, 0x08200800, 0x40801104,
+	0x90440600, 0x00080038, 0x0c200800, 0x40901104,
+	0x00000002, 0x00002000, 0x20000000, 0x00000001,
+	0x00040002, 0x00002000, 0x24000000, 0x00100001,
+	0x00400002, 0x00002008, 0x20000800, 0x40000001,
+	0x00440002, 0x00002008, 0x24000800, 0x40100001,
+	0x00000402, 0x00002020, 0x28000000, 0x00000101,
+	0x00040402, 0x00002020, 0x2c000000, 0x00100101,
+	0x00400402, 0x00002028, 0x28000800, 0x40000101,
+	0x00440402, 0x00002028, 0x2c000800, 0x40100101,
+	0x80000002, 0x00002010, 0x20000000, 0x00800001,
+	0x80040002, 0x00002010, 0x24000000, 0x00900001,
+	0x80400002, 0x00002018, 0x20000800, 0x40800001,
+	0x80440002, 0x00002018, 0x24000800, 0x40900001,
+	0x80000402, 0x00002030, 0x28000000, 0x00800101,
+	0x80040402, 0x00002030, 0x2c000000, 0x00900101,
+	0x80400402, 0x00002038, 0x28000800, 0x40800101,
+	0x80440402, 0x00002038, 0x2c000800, 0x40900101,
+	0x10000002, 0x00002000, 0x20200000, 0x00001001,
+	0x10040002, 0x00002000, 0x24200000, 0x00101001,
+	0x10400002, 0x00002008, 0x20200800, 0x40001001,
+	0x10440002, 0x00002008, 0x24200800, 0x40101001,
+	0x10000402, 0x00002020, 0x28200000, 0x00001101,
+	0x10040402, 0x00002020, 0x2c200000, 0x00101101,
+	0x10400402, 0x00002028, 0x28200800, 0x40001101,
+	0x10440402, 0x00002028, 0x2c200800, 0x40101101,
+	0x90000002, 0x00002010, 0x20200000, 0x00801001,
+	0x90040002, 0x00002010, 0x24200000, 0x00901001,
+	0x90400002, 0x00002018, 0x20200800, 0x40801001,
+	0x90440002, 0x00002018, 0x24200800, 0x40901001,
+	0x90000402, 0x00002030, 0x28200000, 0x00801101,
+	0x90040402, 0x00002030, 0x2c200000, 0x00901101,
+	0x90400402, 0x00002038, 0x28200800, 0x40801101,
+	0x90440402, 0x00002038, 0x2c200800, 0x40901101,
+	0x00000202, 0x00082000, 0x20000000, 0x00000005,
+	0x00040202, 0x00082000, 0x24000000, 0x00100005,
+	0x00400202, 0x00082008, 0x20000800, 0x40000005,
+	0x00440202, 0x00082008, 0x24000800, 0x40100005,
+	0x00000602, 0x00082020, 0x28000000, 0x00000105,
+	0x00040602, 0x00082020, 0x2c000000, 0x00100105,
+	0x00400602, 0x00082028, 0x28000800, 0x40000105,
+	0x00440602, 0x00082028, 0x2c000800, 0x40100105,
+	0x80000202, 0x00082010, 0x20000000, 0x00800005,
+	0x80040202, 0x00082010, 0x24000000, 0x00900005,
+	0x80400202, 0x00082018, 0x20000800, 0x40800005,
+	0x80440202, 0x00082018, 0x24000800, 0x40900005,
+	0x80000602, 0x00082030, 0x28000000, 0x00800105,
+	0x80040602, 0x00082030, 0x2c000000, 0x00900105,
+	0x80400602, 0x00082038, 0x28000800, 0x40800105,
+	0x80440602, 0x00082038, 0x2c000800, 0x40900105,
+	0x10000202, 0x00082000, 0x20200000, 0x00001005,
+	0x10040202, 0x00082000, 0x24200000, 0x00101005,
+	0x10400202, 0x00082008, 0x20200800, 0x40001005,
+	0x10440202, 0x00082008, 0x24200800, 0x40101005,
+	0x10000602, 0x00082020, 0x28200000, 0x00001105,
+	0x10040602, 0x00082020, 0x2c200000, 0x00101105,
+	0x10400602, 0x00082028, 0x28200800, 0x40001105,
+	0x10440602, 0x00082028, 0x2c200800, 0x40101105,
+	0x90000202, 0x00082010, 0x20200000, 0x00801005,
+	0x90040202, 0x00082010, 0x24200000, 0x00901005,
+	0x90400202, 0x00082018, 0x20200800, 0x40801005,
+	0x90440202, 0x00082018, 0x24200800, 0x40901005,
+	0x90000602, 0x00082030, 0x28200000, 0x00801105,
+	0x90040602, 0x00082030, 0x2c200000, 0x00901105,
+	0x90400602, 0x00082038, 0x28200800, 0x40801105,
+	0x90440602, 0x00082038, 0x2c200800, 0x40901105,
+
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000008, 0x00080000, 0x10000000,
+	0x02000000, 0x00000000, 0x00000080, 0x00001000,
+	0x02000000, 0x00000008, 0x00080080, 0x10001000,
+	0x00004000, 0x00000000, 0x00000040, 0x00040000,
+	0x00004000, 0x00000008, 0x00080040, 0x10040000,
+	0x02004000, 0x00000000, 0x000000c0, 0x00041000,
+	0x02004000, 0x00000008, 0x000800c0, 0x10041000,
+	0x00020000, 0x00008000, 0x08000000, 0x00200000,
+	0x00020000, 0x00008008, 0x08080000, 0x10200000,
+	0x02020000, 0x00008000, 0x08000080, 0x00201000,
+	0x02020000, 0x00008008, 0x08080080, 0x10201000,
+	0x00024000, 0x00008000, 0x08000040, 0x00240000,
+	0x00024000, 0x00008008, 0x08080040, 0x10240000,
+	0x02024000, 0x00008000, 0x080000c0, 0x00241000,
+	0x02024000, 0x00008008, 0x080800c0, 0x10241000,
+	0x00000000, 0x01000000, 0x00002000, 0x00000020,
+	0x00000000, 0x01000008, 0x00082000, 0x10000020,
+	0x02000000, 0x01000000, 0x00002080, 0x00001020,
+	0x02000000, 0x01000008, 0x00082080, 0x10001020,
+	0x00004000, 0x01000000, 0x00002040, 0x00040020,
+	0x00004000, 0x01000008, 0x00082040, 0x10040020,
+	0x02004000, 0x01000000, 0x000020c0, 0x00041020,
+	0x02004000, 0x01000008, 0x000820c0, 0x10041020,
+	0x00020000, 0x01008000, 0x08002000, 0x00200020,
+	0x00020000, 0x01008008, 0x08082000, 0x10200020,
+	0x02020000, 0x01008000, 0x08002080, 0x00201020,
+	0x02020000, 0x01008008, 0x08082080, 0x10201020,
+	0x00024000, 0x01008000, 0x08002040, 0x00240020,
+	0x00024000, 0x01008008, 0x08082040, 0x10240020,
+	0x02024000, 0x01008000, 0x080020c0, 0x00241020,
+	0x02024000, 0x01008008, 0x080820c0, 0x10241020,
+	0x00000400, 0x04000000, 0x00100000, 0x00000004,
+	0x00000400, 0x04000008, 0x00180000, 0x10000004,
+	0x02000400, 0x04000000, 0x00100080, 0x00001004,
+	0x02000400, 0x04000008, 0x00180080, 0x10001004,
+	0x00004400, 0x04000000, 0x00100040, 0x00040004,
+	0x00004400, 0x04000008, 0x00180040, 0x10040004,
+	0x02004400, 0x04000000, 0x001000c0, 0x00041004,
+	0x02004400, 0x04000008, 0x001800c0, 0x10041004,
+	0x00020400, 0x04008000, 0x08100000, 0x00200004,
+	0x00020400, 0x04008008, 0x08180000, 0x10200004,
+	0x02020400, 0x04008000, 0x08100080, 0x00201004,
+	0x02020400, 0x04008008, 0x08180080, 0x10201004,
+	0x00024400, 0x04008000, 0x08100040, 0x00240004,
+	0x00024400, 0x04008008, 0x08180040, 0x10240004,
+	0x02024400, 0x04008000, 0x081000c0, 0x00241004,
+	0x02024400, 0x04008008, 0x081800c0, 0x10241004,
+	0x00000400, 0x05000000, 0x00102000, 0x00000024,
+	0x00000400, 0x05000008, 0x00182000, 0x10000024,
+	0x02000400, 0x05000000, 0x00102080, 0x00001024,
+	0x02000400, 0x05000008, 0x00182080, 0x10001024,
+	0x00004400, 0x05000000, 0x00102040, 0x00040024,
+	0x00004400, 0x05000008, 0x00182040, 0x10040024,
+	0x02004400, 0x05000000, 0x001020c0, 0x00041024,
+	0x02004400, 0x05000008, 0x001820c0, 0x10041024,
+	0x00020400, 0x05008000, 0x08102000, 0x00200024,
+	0x00020400, 0x05008008, 0x08182000, 0x10200024,
+	0x02020400, 0x05008000, 0x08102080, 0x00201024,
+	0x02020400, 0x05008008, 0x08182080, 0x10201024,
+	0x00024400, 0x05008000, 0x08102040, 0x00240024,
+	0x00024400, 0x05008008, 0x08182040, 0x10240024,
+	0x02024400, 0x05008000, 0x081020c0, 0x00241024,
+	0x02024400, 0x05008008, 0x081820c0, 0x10241024,
+	0x00000800, 0x00010000, 0x20000000, 0x00000010,
+	0x00000800, 0x00010008, 0x20080000, 0x10000010,
+	0x02000800, 0x00010000, 0x20000080, 0x00001010,
+	0x02000800, 0x00010008, 0x20080080, 0x10001010,
+	0x00004800, 0x00010000, 0x20000040, 0x00040010,
+	0x00004800, 0x00010008, 0x20080040, 0x10040010,
+	0x02004800, 0x00010000, 0x200000c0, 0x00041010,
+	0x02004800, 0x00010008, 0x200800c0, 0x10041010,
+	0x00020800, 0x00018000, 0x28000000, 0x00200010,
+	0x00020800, 0x00018008, 0x28080000, 0x10200010,
+	0x02020800, 0x00018000, 0x28000080, 0x00201010,
+	0x02020800, 0x00018008, 0x28080080, 0x10201010,
+	0x00024800, 0x00018000, 0x28000040, 0x00240010,
+	0x00024800, 0x00018008, 0x28080040, 0x10240010,
+	0x02024800, 0x00018000, 0x280000c0, 0x00241010,
+	0x02024800, 0x00018008, 0x280800c0, 0x10241010,
+	0x00000800, 0x01010000, 0x20002000, 0x00000030,
+	0x00000800, 0x01010008, 0x20082000, 0x10000030,
+	0x02000800, 0x01010000, 0x20002080, 0x00001030,
+	0x02000800, 0x01010008, 0x20082080, 0x10001030,
+	0x00004800, 0x01010000, 0x20002040, 0x00040030,
+	0x00004800, 0x01010008, 0x20082040, 0x10040030,
+	0x02004800, 0x01010000, 0x200020c0, 0x00041030,
+	0x02004800, 0x01010008, 0x200820c0, 0x10041030,
+	0x00020800, 0x01018000, 0x28002000, 0x00200030,
+	0x00020800, 0x01018008, 0x28082000, 0x10200030,
+	0x02020800, 0x01018000, 0x28002080, 0x00201030,
+	0x02020800, 0x01018008, 0x28082080, 0x10201030,
+	0x00024800, 0x01018000, 0x28002040, 0x00240030,
+	0x00024800, 0x01018008, 0x28082040, 0x10240030,
+	0x02024800, 0x01018000, 0x280020c0, 0x00241030,
+	0x02024800, 0x01018008, 0x280820c0, 0x10241030,
+	0x00000c00, 0x04010000, 0x20100000, 0x00000014,
+	0x00000c00, 0x04010008, 0x20180000, 0x10000014,
+	0x02000c00, 0x04010000, 0x20100080, 0x00001014,
+	0x02000c00, 0x04010008, 0x20180080, 0x10001014,
+	0x00004c00, 0x04010000, 0x20100040, 0x00040014,
+	0x00004c00, 0x04010008, 0x20180040, 0x10040014,
+	0x02004c00, 0x04010000, 0x201000c0, 0x00041014,
+	0x02004c00, 0x04010008, 0x201800c0, 0x10041014,
+	0x00020c00, 0x04018000, 0x28100000, 0x00200014,
+	0x00020c00, 0x04018008, 0x28180000, 0x10200014,
+	0x02020c00, 0x04018000, 0x28100080, 0x00201014,
+	0x02020c00, 0x04018008, 0x28180080, 0x10201014,
+	0x00024c00, 0x04018000, 0x28100040, 0x00240014,
+	0x00024c00, 0x04018008, 0x28180040, 0x10240014,
+	0x02024c00, 0x04018000, 0x281000c0, 0x00241014,
+	0x02024c00, 0x04018008, 0x281800c0, 0x10241014,
+	0x00000c00, 0x05010000, 0x20102000, 0x00000034,
+	0x00000c00, 0x05010008, 0x20182000, 0x10000034,
+	0x02000c00, 0x05010000, 0x20102080, 0x00001034,
+	0x02000c00, 0x05010008, 0x20182080, 0x10001034,
+	0x00004c00, 0x05010000, 0x20102040, 0x00040034,
+	0x00004c00, 0x05010008, 0x20182040, 0x10040034,
+	0x02004c00, 0x05010000, 0x201020c0, 0x00041034,
+	0x02004c00, 0x05010008, 0x201820c0, 0x10041034,
+	0x00020c00, 0x05018000, 0x28102000, 0x00200034,
+	0x00020c00, 0x05018008, 0x28182000, 0x10200034,
+	0x02020c00, 0x05018000, 0x28102080, 0x00201034,
+	0x02020c00, 0x05018008, 0x28182080, 0x10201034,
+	0x00024c00, 0x05018000, 0x28102040, 0x00240034,
+	0x00024c00, 0x05018008, 0x28182040, 0x10240034,
+	0x02024c00, 0x05018000, 0x281020c0, 0x00241034,
+	0x02024c00, 0x05018008, 0x281820c0, 0x10241034
+};
+
+/* S-box lookup tables */
+
+static const u32 S1[64] = {
+	0x01010400, 0x00000000, 0x00010000, 0x01010404,
+	0x01010004, 0x00010404, 0x00000004, 0x00010000,
+	0x00000400, 0x01010400, 0x01010404, 0x00000400,
+	0x01000404, 0x01010004, 0x01000000, 0x00000004,
+	0x00000404, 0x01000400, 0x01000400, 0x00010400,
+	0x00010400, 0x01010000, 0x01010000, 0x01000404,
+	0x00010004, 0x01000004, 0x01000004, 0x00010004,
+	0x00000000, 0x00000404, 0x00010404, 0x01000000,
+	0x00010000, 0x01010404, 0x00000004, 0x01010000,
+	0x01010400, 0x01000000, 0x01000000, 0x00000400,
+	0x01010004, 0x00010000, 0x00010400, 0x01000004,
+	0x00000400, 0x00000004, 0x01000404, 0x00010404,
+	0x01010404, 0x00010004, 0x01010000, 0x01000404,
+	0x01000004, 0x00000404, 0x00010404, 0x01010400,
+	0x00000404, 0x01000400, 0x01000400, 0x00000000,
+	0x00010004, 0x00010400, 0x00000000, 0x01010004
+};
+
+static const u32 S2[64] = {
+	0x80108020, 0x80008000, 0x00008000, 0x00108020,
+	0x00100000, 0x00000020, 0x80100020, 0x80008020,
+	0x80000020, 0x80108020, 0x80108000, 0x80000000,
+	0x80008000, 0x00100000, 0x00000020, 0x80100020,
+	0x00108000, 0x00100020, 0x80008020, 0x00000000,
+	0x80000000, 0x00008000, 0x00108020, 0x80100000,
+	0x00100020, 0x80000020, 0x00000000, 0x00108000,
+	0x00008020, 0x80108000, 0x80100000, 0x00008020,
+	0x00000000, 0x00108020, 0x80100020, 0x00100000,
+	0x80008020, 0x80100000, 0x80108000, 0x00008000,
+	0x80100000, 0x80008000, 0x00000020, 0x80108020,
+	0x00108020, 0x00000020, 0x00008000, 0x80000000,
+	0x00008020, 0x80108000, 0x00100000, 0x80000020,
+	0x00100020, 0x80008020, 0x80000020, 0x00100020,
+	0x00108000, 0x00000000, 0x80008000, 0x00008020,
+	0x80000000, 0x80100020, 0x80108020, 0x00108000
+};
+
+static const u32 S3[64] = {
+	0x00000208, 0x08020200, 0x00000000, 0x08020008,
+	0x08000200, 0x00000000, 0x00020208, 0x08000200,
+	0x00020008, 0x08000008, 0x08000008, 0x00020000,
+	0x08020208, 0x00020008, 0x08020000, 0x00000208,
+	0x08000000, 0x00000008, 0x08020200, 0x00000200,
+	0x00020200, 0x08020000, 0x08020008, 0x00020208,
+	0x08000208, 0x00020200, 0x00020000, 0x08000208,
+	0x00000008, 0x08020208, 0x00000200, 0x08000000,
+	0x08020200, 0x08000000, 0x00020008, 0x00000208,
+	0x00020000, 0x08020200, 0x08000200, 0x00000000,
+	0x00000200, 0x00020008, 0x08020208, 0x08000200,
+	0x08000008, 0x00000200, 0x00000000, 0x08020008,
+	0x08000208, 0x00020000, 0x08000000, 0x08020208,
+	0x00000008, 0x00020208, 0x00020200, 0x08000008,
+	0x08020000, 0x08000208, 0x00000208, 0x08020000,
+	0x00020208, 0x00000008, 0x08020008, 0x00020200
+};
+
+static const u32 S4[64] = {
+	0x00802001, 0x00002081, 0x00002081, 0x00000080,
+	0x00802080, 0x00800081, 0x00800001, 0x00002001,
+	0x00000000, 0x00802000, 0x00802000, 0x00802081,
+	0x00000081, 0x00000000, 0x00800080, 0x00800001,
+	0x00000001, 0x00002000, 0x00800000, 0x00802001,
+	0x00000080, 0x00800000, 0x00002001, 0x00002080,
+	0x00800081, 0x00000001, 0x00002080, 0x00800080,
+	0x00002000, 0x00802080, 0x00802081, 0x00000081,
+	0x00800080, 0x00800001, 0x00802000, 0x00802081,
+	0x00000081, 0x00000000, 0x00000000, 0x00802000,
+	0x00002080, 0x00800080, 0x00800081, 0x00000001,
+	0x00802001, 0x00002081, 0x00002081, 0x00000080,
+	0x00802081, 0x00000081, 0x00000001, 0x00002000,
+	0x00800001, 0x00002001, 0x00802080, 0x00800081,
+	0x00002001, 0x00002080, 0x00800000, 0x00802001,
+	0x00000080, 0x00800000, 0x00002000, 0x00802080
+};
+
+static const u32 S5[64] = {
+	0x00000100, 0x02080100, 0x02080000, 0x42000100,
+	0x00080000, 0x00000100, 0x40000000, 0x02080000,
+	0x40080100, 0x00080000, 0x02000100, 0x40080100,
+	0x42000100, 0x42080000, 0x00080100, 0x40000000,
+	0x02000000, 0x40080000, 0x40080000, 0x00000000,
+	0x40000100, 0x42080100, 0x42080100, 0x02000100,
+	0x42080000, 0x40000100, 0x00000000, 0x42000000,
+	0x02080100, 0x02000000, 0x42000000, 0x00080100,
+	0x00080000, 0x42000100, 0x00000100, 0x02000000,
+	0x40000000, 0x02080000, 0x42000100, 0x40080100,
+	0x02000100, 0x40000000, 0x42080000, 0x02080100,
+	0x40080100, 0x00000100, 0x02000000, 0x42080000,
+	0x42080100, 0x00080100, 0x42000000, 0x42080100,
+	0x02080000, 0x00000000, 0x40080000, 0x42000000,
+	0x00080100, 0x02000100, 0x40000100, 0x00080000,
+	0x00000000, 0x40080000, 0x02080100, 0x40000100
+};
+
+static const u32 S6[64] = {
+	0x20000010, 0x20400000, 0x00004000, 0x20404010,
+	0x20400000, 0x00000010, 0x20404010, 0x00400000,
+	0x20004000, 0x00404010, 0x00400000, 0x20000010,
+	0x00400010, 0x20004000, 0x20000000, 0x00004010,
+	0x00000000, 0x00400010, 0x20004010, 0x00004000,
+	0x00404000, 0x20004010, 0x00000010, 0x20400010,
+	0x20400010, 0x00000000, 0x00404010, 0x20404000,
+	0x00004010, 0x00404000, 0x20404000, 0x20000000,
+	0x20004000, 0x00000010, 0x20400010, 0x00404000,
+	0x20404010, 0x00400000, 0x00004010, 0x20000010,
+	0x00400000, 0x20004000, 0x20000000, 0x00004010,
+	0x20000010, 0x20404010, 0x00404000, 0x20400000,
+	0x00404010, 0x20404000, 0x00000000, 0x20400010,
+	0x00000010, 0x00004000, 0x20400000, 0x00404010,
+	0x00004000, 0x00400010, 0x20004010, 0x00000000,
+	0x20404000, 0x20000000, 0x00400010, 0x20004010
+};
+
+static const u32 S7[64] = {
+	0x00200000, 0x04200002, 0x04000802, 0x00000000,
+	0x00000800, 0x04000802, 0x00200802, 0x04200800,
+	0x04200802, 0x00200000, 0x00000000, 0x04000002,
+	0x00000002, 0x04000000, 0x04200002, 0x00000802,
+	0x04000800, 0x00200802, 0x00200002, 0x04000800,
+	0x04000002, 0x04200000, 0x04200800, 0x00200002,
+	0x04200000, 0x00000800, 0x00000802, 0x04200802,
+	0x00200800, 0x00000002, 0x04000000, 0x00200800,
+	0x04000000, 0x00200800, 0x00200000, 0x04000802,
+	0x04000802, 0x04200002, 0x04200002, 0x00000002,
+	0x00200002, 0x04000000, 0x04000800, 0x00200000,
+	0x04200800, 0x00000802, 0x00200802, 0x04200800,
+	0x00000802, 0x04000002, 0x04200802, 0x04200000,
+	0x00200800, 0x00000000, 0x00000002, 0x04200802,
+	0x00000000, 0x00200802, 0x04200000, 0x00000800,
+	0x04000002, 0x04000800, 0x00000800, 0x00200002
+};
+
+static const u32 S8[64] = {
+	0x10001040, 0x00001000, 0x00040000, 0x10041040,
+	0x10000000, 0x10001040, 0x00000040, 0x10000000,
+	0x00040040, 0x10040000, 0x10041040, 0x00041000,
+	0x10041000, 0x00041040, 0x00001000, 0x00000040,
+	0x10040000, 0x10000040, 0x10001000, 0x00001040,
+	0x00041000, 0x00040040, 0x10040040, 0x10041000,
+	0x00001040, 0x00000000, 0x00000000, 0x10040040,
+	0x10000040, 0x10001000, 0x00041040, 0x00040000,
+	0x00041040, 0x00040000, 0x10041000, 0x00001000,
+	0x00000040, 0x10040040, 0x00001000, 0x00041040,
+	0x10001000, 0x00000040, 0x10000040, 0x10040000,
+	0x10040040, 0x10000000, 0x00040000, 0x10001040,
+	0x00000000, 0x10041040, 0x00040040, 0x10000040,
+	0x10040000, 0x10001000, 0x10001040, 0x00000000,
+	0x10041040, 0x00041000, 0x00041000, 0x00001040,
+	0x00001040, 0x00040040, 0x10000000, 0x10041000
+};
+
+/* Encryption components: IP, FP, and round function */
+
+#define IP(L, R, T)		\
+	ROL(R, 4);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xf0f0f0f0;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROL(R, 12);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xffff0000;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROR(R, 14);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xcccccccc;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROL(R, 6);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xff00ff00;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROR(R, 7);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xaaaaaaaa;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROL(L, 1);
+
+#define FP(L, R, T)		\
+	ROR(L, 1);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xaaaaaaaa;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROL(R, 7);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xff00ff00;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROR(R, 6);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xcccccccc;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROL(R, 14);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xffff0000;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROR(R, 12);		\
+	T  = L;			\
+	L ^= R;			\
+	L &= 0xf0f0f0f0;	\
+	R ^= L;			\
+	L ^= T;			\
+	ROR(R, 4);
+
+#define ROUND(L, R, A, B, K, d)					\
+	B = K[0];			A = K[1];	K += d;	\
+	B ^= R;				A ^= R;			\
+	B &= 0x3f3f3f3f;		ROR(A, 4);		\
+	L ^= S8[0xff & B];		A &= 0x3f3f3f3f;	\
+	L ^= S6[0xff & (B >> 8)];	B >>= 16;		\
+	L ^= S7[0xff & A];					\
+	L ^= S5[0xff & (A >> 8)];	A >>= 16;		\
+	L ^= S4[0xff & B];					\
+	L ^= S2[0xff & (B >> 8)];				\
+	L ^= S3[0xff & A];					\
+	L ^= S1[0xff & (A >> 8)];
+
+/*
+ * PC2 lookup tables are organized as 2 consecutive sets of 4 interleaved
+ * tables of 128 elements.  One set is for C_i and the other for D_i, while
+ * the 4 interleaved tables correspond to four 7-bit subsets of C_i or D_i.
+ *
+ * After PC1 each of the variables a,b,c,d contains a 7 bit subset of C_i
+ * or D_i in bits 7-1 (bit 0 being the least significant).
+ */
+
+#define T1(x) pt[2 * (x) + 0]
+#define T2(x) pt[2 * (x) + 1]
+#define T3(x) pt[2 * (x) + 2]
+#define T4(x) pt[2 * (x) + 3]
+
+#define DES_PC2(a, b, c, d) (T4(d) | T3(c) | T2(b) | T1(a))
+
+/*
+ * Encryption key expansion
+ *
+ * RFC2451: Weak key checks SHOULD be performed.
+ *
+ * FIPS 74:
+ *
+ *   Keys having duals are keys which produce all zeros, all ones, or
+ *   alternating zero-one patterns in the C and D registers after Permuted
+ *   Choice 1 has operated on the key.
+ *
+ */
+static unsigned long des_ekey(u32 *pe, const u8 *k)
+{
+	/* K&R: long is at least 32 bits */
+	unsigned long a, b, c, d, w;
+	const u32 *pt = pc2;
+
+	d = k[4]; d &= 0x0e; d <<= 4; d |= k[0] & 0x1e; d = pc1[d];
+	c = k[5]; c &= 0x0e; c <<= 4; c |= k[1] & 0x1e; c = pc1[c];
+	b = k[6]; b &= 0x0e; b <<= 4; b |= k[2] & 0x1e; b = pc1[b];
+	a = k[7]; a &= 0x0e; a <<= 4; a |= k[3] & 0x1e; a = pc1[a];
+
+	pe[15 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d];
+	pe[14 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[13 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[12 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[11 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[10 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 9 * 2 + 0] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 8 * 2 + 0] = DES_PC2(d, a, b, c); c = rs[c];
+	pe[ 7 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 6 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 5 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 4 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 3 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 2 * 2 + 0] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 1 * 2 + 0] = DES_PC2(c, d, a, b); b = rs[b];
+	pe[ 0 * 2 + 0] = DES_PC2(b, c, d, a);
+
+	/* Check if first half is weak */
+	w  = (a ^ c) | (b ^ d) | (rs[a] ^ c) | (b ^ rs[d]);
+
+	/* Skip to next table set */
+	pt += 512;
+
+	d = k[0]; d &= 0xe0; d >>= 4; d |= k[4] & 0xf0; d = pc1[d + 1];
+	c = k[1]; c &= 0xe0; c >>= 4; c |= k[5] & 0xf0; c = pc1[c + 1];
+	b = k[2]; b &= 0xe0; b >>= 4; b |= k[6] & 0xf0; b = pc1[b + 1];
+	a = k[3]; a &= 0xe0; a >>= 4; a |= k[7] & 0xf0; a = pc1[a + 1];
+
+	/* Check if second half is weak */
+	w |= (a ^ c) | (b ^ d) | (rs[a] ^ c) | (b ^ rs[d]);
+
+	pe[15 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d];
+	pe[14 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[13 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[12 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[11 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[10 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 9 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 8 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c];
+	pe[ 7 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 6 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 5 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 4 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 3 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 2 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[ 1 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b];
+	pe[ 0 * 2 + 1] = DES_PC2(b, c, d, a);
+
+	/* Fixup: 2413 5768 -> 1357 2468 */
+	for (d = 0; d < 16; ++d) {
+		a = pe[2 * d];
+		b = pe[2 * d + 1];
+		c = a ^ b;
+		c &= 0xffff0000;
+		a ^= c;
+		b ^= c;
+		ROL(b, 18);
+		pe[2 * d] = a;
+		pe[2 * d + 1] = b;
+	}
+
+	/* Zero if weak key */
+	return w;
+}
+
+int des_expand_key(struct des_ctx *ctx, const u8 *key, unsigned int keylen)
+{
+	if (keylen != DES_KEY_SIZE)
+		return -EINVAL;
+
+	return des_ekey(ctx->expkey, key) ? 0 : -ENOKEY;
+}
+EXPORT_SYMBOL_GPL(des_expand_key);
+
+/*
+ * Decryption key expansion
+ *
+ * No weak key checking is performed, as this is only used by triple DES
+ *
+ */
+static void dkey(u32 *pe, const u8 *k)
+{
+	/* K&R: long is at least 32 bits */
+	unsigned long a, b, c, d;
+	const u32 *pt = pc2;
+
+	d = k[4]; d &= 0x0e; d <<= 4; d |= k[0] & 0x1e; d = pc1[d];
+	c = k[5]; c &= 0x0e; c <<= 4; c |= k[1] & 0x1e; c = pc1[c];
+	b = k[6]; b &= 0x0e; b <<= 4; b |= k[2] & 0x1e; b = pc1[b];
+	a = k[7]; a &= 0x0e; a <<= 4; a |= k[3] & 0x1e; a = pc1[a];
+
+	pe[ 0 * 2] = DES_PC2(a, b, c, d); d = rs[d];
+	pe[ 1 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 2 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 3 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 4 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 5 * 2] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 6 * 2] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 7 * 2] = DES_PC2(d, a, b, c); c = rs[c];
+	pe[ 8 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 9 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[10 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[11 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[12 * 2] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[13 * 2] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[14 * 2] = DES_PC2(c, d, a, b); b = rs[b];
+	pe[15 * 2] = DES_PC2(b, c, d, a);
+
+	/* Skip to next table set */
+	pt += 512;
+
+	d = k[0]; d &= 0xe0; d >>= 4; d |= k[4] & 0xf0; d = pc1[d + 1];
+	c = k[1]; c &= 0xe0; c >>= 4; c |= k[5] & 0xf0; c = pc1[c + 1];
+	b = k[2]; b &= 0xe0; b >>= 4; b |= k[6] & 0xf0; b = pc1[b + 1];
+	a = k[3]; a &= 0xe0; a >>= 4; a |= k[7] & 0xf0; a = pc1[a + 1];
+
+	pe[ 0 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d];
+	pe[ 1 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 2 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 3 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 4 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 5 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c]; b = rs[b];
+	pe[ 6 * 2 + 1] = DES_PC2(b, c, d, a); a = rs[a]; d = rs[d];
+	pe[ 7 * 2 + 1] = DES_PC2(d, a, b, c); c = rs[c];
+	pe[ 8 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[ 9 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[10 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[11 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[12 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b]; a = rs[a];
+	pe[13 * 2 + 1] = DES_PC2(a, b, c, d); d = rs[d]; c = rs[c];
+	pe[14 * 2 + 1] = DES_PC2(c, d, a, b); b = rs[b];
+	pe[15 * 2 + 1] = DES_PC2(b, c, d, a);
+
+	/* Fixup: 2413 5768 -> 1357 2468 */
+	for (d = 0; d < 16; ++d) {
+		a = pe[2 * d];
+		b = pe[2 * d + 1];
+		c = a ^ b;
+		c &= 0xffff0000;
+		a ^= c;
+		b ^= c;
+		ROL(b, 18);
+		pe[2 * d] = a;
+		pe[2 * d + 1] = b;
+	}
+}
+
+void des_encrypt(const struct des_ctx *ctx, u8 *dst, const u8 *src)
+{
+	const u32 *K = ctx->expkey;
+	u32 L, R, A, B;
+	int i;
+
+	L = get_unaligned_le32(src);
+	R = get_unaligned_le32(src + 4);
+
+	IP(L, R, A);
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, 2);
+		ROUND(R, L, A, B, K, 2);
+	}
+	FP(R, L, A);
+
+	put_unaligned_le32(R, dst);
+	put_unaligned_le32(L, dst + 4);
+}
+EXPORT_SYMBOL_GPL(des_encrypt);
+
+void des_decrypt(const struct des_ctx *ctx, u8 *dst, const u8 *src)
+{
+	const u32 *K = ctx->expkey + DES_EXPKEY_WORDS - 2;
+	u32 L, R, A, B;
+	int i;
+
+	L = get_unaligned_le32(src);
+	R = get_unaligned_le32(src + 4);
+
+	IP(L, R, A);
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, -2);
+		ROUND(R, L, A, B, K, -2);
+	}
+	FP(R, L, A);
+
+	put_unaligned_le32(R, dst);
+	put_unaligned_le32(L, dst + 4);
+}
+EXPORT_SYMBOL_GPL(des_decrypt);
+
+int des3_ede_expand_key(struct des3_ede_ctx *ctx, const u8 *key,
+			unsigned int keylen)
+{
+	u32 *pe = ctx->expkey;
+	int err;
+
+	if (keylen != DES3_EDE_KEY_SIZE)
+		return -EINVAL;
+
+	err = des3_ede_verify_key(key, keylen, true);
+	if (err && err != -ENOKEY)
+		return err;
+
+	des_ekey(pe, key); pe += DES_EXPKEY_WORDS; key += DES_KEY_SIZE;
+	dkey(pe, key); pe += DES_EXPKEY_WORDS; key += DES_KEY_SIZE;
+	des_ekey(pe, key);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(des3_ede_expand_key);
+
+void des3_ede_encrypt(const struct des3_ede_ctx *dctx, u8 *dst, const u8 *src)
+{
+	const u32 *K = dctx->expkey;
+	u32 L, R, A, B;
+	int i;
+
+	L = get_unaligned_le32(src);
+	R = get_unaligned_le32(src + 4);
+
+	IP(L, R, A);
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, 2);
+		ROUND(R, L, A, B, K, 2);
+	}
+	for (i = 0; i < 8; i++) {
+		ROUND(R, L, A, B, K, 2);
+		ROUND(L, R, A, B, K, 2);
+	}
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, 2);
+		ROUND(R, L, A, B, K, 2);
+	}
+	FP(R, L, A);
+
+	put_unaligned_le32(R, dst);
+	put_unaligned_le32(L, dst + 4);
+}
+EXPORT_SYMBOL_GPL(des3_ede_encrypt);
+
+void des3_ede_decrypt(const struct des3_ede_ctx *dctx, u8 *dst, const u8 *src)
+{
+	const u32 *K = dctx->expkey + DES3_EDE_EXPKEY_WORDS - 2;
+	u32 L, R, A, B;
+	int i;
+
+	L = get_unaligned_le32(src);
+	R = get_unaligned_le32(src + 4);
+
+	IP(L, R, A);
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, -2);
+		ROUND(R, L, A, B, K, -2);
+	}
+	for (i = 0; i < 8; i++) {
+		ROUND(R, L, A, B, K, -2);
+		ROUND(L, R, A, B, K, -2);
+	}
+	for (i = 0; i < 8; i++) {
+		ROUND(L, R, A, B, K, -2);
+		ROUND(R, L, A, B, K, -2);
+	}
+	FP(R, L, A);
+
+	put_unaligned_le32(R, dst);
+	put_unaligned_le32(L, dst + 4);
+}
+EXPORT_SYMBOL_GPL(des3_ede_decrypt);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/sha256.c b/lib/crypto/sha256.c
similarity index 66%
rename from lib/sha256.c
rename to lib/crypto/sha256.c
index d9af148..66cb04b 100644
--- a/lib/sha256.c
+++ b/lib/crypto/sha256.c
@@ -12,9 +12,11 @@
  */
 
 #include <linux/bitops.h>
-#include <linux/sha256.h>
+#include <linux/export.h>
+#include <linux/module.h>
 #include <linux/string.h>
-#include <asm/byteorder.h>
+#include <crypto/sha.h>
+#include <asm/unaligned.h>
 
 static inline u32 Ch(u32 x, u32 y, u32 z)
 {
@@ -33,7 +35,7 @@
 
 static inline void LOAD_OP(int I, u32 *W, const u8 *input)
 {
-	W[I] = __be32_to_cpu(((__be32 *)(input))[I]);
+	W[I] = get_unaligned_be32((__u32 *)input + I);
 }
 
 static inline void BLEND_OP(int I, u32 *W)
@@ -92,131 +94,116 @@
 	t1 = b + e1(g) + Ch(g, h, a) + 0x9bdc06a7 + W[14];
 	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
 	t1 = a + e1(f) + Ch(f, g, h) + 0xc19bf174 + W[15];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
 
 	t1 = h + e1(e) + Ch(e, f, g) + 0xe49b69c1 + W[16];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
 	t1 = g + e1(d) + Ch(d, e, f) + 0xefbe4786 + W[17];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
 	t1 = f + e1(c) + Ch(c, d, e) + 0x0fc19dc6 + W[18];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
 	t1 = e + e1(b) + Ch(b, c, d) + 0x240ca1cc + W[19];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
 	t1 = d + e1(a) + Ch(a, b, c) + 0x2de92c6f + W[20];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
 	t1 = c + e1(h) + Ch(h, a, b) + 0x4a7484aa + W[21];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
 	t1 = b + e1(g) + Ch(g, h, a) + 0x5cb0a9dc + W[22];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
 	t1 = a + e1(f) + Ch(f, g, h) + 0x76f988da + W[23];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
 
 	t1 = h + e1(e) + Ch(e, f, g) + 0x983e5152 + W[24];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
 	t1 = g + e1(d) + Ch(d, e, f) + 0xa831c66d + W[25];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
 	t1 = f + e1(c) + Ch(c, d, e) + 0xb00327c8 + W[26];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
 	t1 = e + e1(b) + Ch(b, c, d) + 0xbf597fc7 + W[27];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
 	t1 = d + e1(a) + Ch(a, b, c) + 0xc6e00bf3 + W[28];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
 	t1 = c + e1(h) + Ch(h, a, b) + 0xd5a79147 + W[29];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
 	t1 = b + e1(g) + Ch(g, h, a) + 0x06ca6351 + W[30];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
 	t1 = a + e1(f) + Ch(f, g, h) + 0x14292967 + W[31];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
 
 	t1 = h + e1(e) + Ch(e, f, g) + 0x27b70a85 + W[32];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
 	t1 = g + e1(d) + Ch(d, e, f) + 0x2e1b2138 + W[33];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
 	t1 = f + e1(c) + Ch(c, d, e) + 0x4d2c6dfc + W[34];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
 	t1 = e + e1(b) + Ch(b, c, d) + 0x53380d13 + W[35];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
 	t1 = d + e1(a) + Ch(a, b, c) + 0x650a7354 + W[36];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
 	t1 = c + e1(h) + Ch(h, a, b) + 0x766a0abb + W[37];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
 	t1 = b + e1(g) + Ch(g, h, a) + 0x81c2c92e + W[38];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
 	t1 = a + e1(f) + Ch(f, g, h) + 0x92722c85 + W[39];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
 
 	t1 = h + e1(e) + Ch(e, f, g) + 0xa2bfe8a1 + W[40];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
 	t1 = g + e1(d) + Ch(d, e, f) + 0xa81a664b + W[41];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
 	t1 = f + e1(c) + Ch(c, d, e) + 0xc24b8b70 + W[42];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
 	t1 = e + e1(b) + Ch(b, c, d) + 0xc76c51a3 + W[43];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
 	t1 = d + e1(a) + Ch(a, b, c) + 0xd192e819 + W[44];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
 	t1 = c + e1(h) + Ch(h, a, b) + 0xd6990624 + W[45];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
 	t1 = b + e1(g) + Ch(g, h, a) + 0xf40e3585 + W[46];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
 	t1 = a + e1(f) + Ch(f, g, h) + 0x106aa070 + W[47];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
 
 	t1 = h + e1(e) + Ch(e, f, g) + 0x19a4c116 + W[48];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
 	t1 = g + e1(d) + Ch(d, e, f) + 0x1e376c08 + W[49];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
 	t1 = f + e1(c) + Ch(c, d, e) + 0x2748774c + W[50];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
 	t1 = e + e1(b) + Ch(b, c, d) + 0x34b0bcb5 + W[51];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
 	t1 = d + e1(a) + Ch(a, b, c) + 0x391c0cb3 + W[52];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
 	t1 = c + e1(h) + Ch(h, a, b) + 0x4ed8aa4a + W[53];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
 	t1 = b + e1(g) + Ch(g, h, a) + 0x5b9cca4f + W[54];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
 	t1 = a + e1(f) + Ch(f, g, h) + 0x682e6ff3 + W[55];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
 
 	t1 = h + e1(e) + Ch(e, f, g) + 0x748f82ee + W[56];
-	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1+t2;
+	t2 = e0(a) + Maj(a, b, c);    d += t1;    h = t1 + t2;
 	t1 = g + e1(d) + Ch(d, e, f) + 0x78a5636f + W[57];
-	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1+t2;
+	t2 = e0(h) + Maj(h, a, b);    c += t1;    g = t1 + t2;
 	t1 = f + e1(c) + Ch(c, d, e) + 0x84c87814 + W[58];
-	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1+t2;
+	t2 = e0(g) + Maj(g, h, a);    b += t1;    f = t1 + t2;
 	t1 = e + e1(b) + Ch(b, c, d) + 0x8cc70208 + W[59];
-	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1+t2;
+	t2 = e0(f) + Maj(f, g, h);    a += t1;    e = t1 + t2;
 	t1 = d + e1(a) + Ch(a, b, c) + 0x90befffa + W[60];
-	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1+t2;
+	t2 = e0(e) + Maj(e, f, g);    h += t1;    d = t1 + t2;
 	t1 = c + e1(h) + Ch(h, a, b) + 0xa4506ceb + W[61];
-	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1+t2;
+	t2 = e0(d) + Maj(d, e, f);    g += t1;    c = t1 + t2;
 	t1 = b + e1(g) + Ch(g, h, a) + 0xbef9a3f7 + W[62];
-	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1+t2;
+	t2 = e0(c) + Maj(c, d, e);    f += t1;    b = t1 + t2;
 	t1 = a + e1(f) + Ch(f, g, h) + 0xc67178f2 + W[63];
-	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1+t2;
+	t2 = e0(b) + Maj(b, c, d);    e += t1;    a = t1 + t2;
 
 	state[0] += a; state[1] += b; state[2] += c; state[3] += d;
 	state[4] += e; state[5] += f; state[6] += g; state[7] += h;
 
 	/* clear any sensitive info... */
 	a = b = c = d = e = f = g = h = t1 = t2 = 0;
-	memset(W, 0, 64 * sizeof(u32));
-}
-
-int sha256_init(struct sha256_state *sctx)
-{
-	sctx->state[0] = SHA256_H0;
-	sctx->state[1] = SHA256_H1;
-	sctx->state[2] = SHA256_H2;
-	sctx->state[3] = SHA256_H3;
-	sctx->state[4] = SHA256_H4;
-	sctx->state[5] = SHA256_H5;
-	sctx->state[6] = SHA256_H6;
-	sctx->state[7] = SHA256_H7;
-	sctx->count = 0;
-
-	return 0;
+	memzero_explicit(W, 64 * sizeof(u32));
 }
 
 int sha256_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
@@ -248,8 +235,15 @@
 
 	return 0;
 }
+EXPORT_SYMBOL(sha256_update);
 
-int sha256_final(struct sha256_state *sctx, u8 *out)
+int sha224_update(struct sha256_state *sctx, const u8 *data, unsigned int len)
+{
+	return sha256_update(sctx, data, len);
+}
+EXPORT_SYMBOL(sha224_update);
+
+static int __sha256_final(struct sha256_state *sctx, u8 *out, int digest_words)
 {
 	__be32 *dst = (__be32 *)out;
 	__be64 bits;
@@ -269,11 +263,25 @@
 	sha256_update(sctx, (const u8 *)&bits, sizeof(bits));
 
 	/* Store state in digest */
-	for (i = 0; i < 8; i++)
-		dst[i] = cpu_to_be32(sctx->state[i]);
+	for (i = 0; i < digest_words; i++)
+		put_unaligned_be32(sctx->state[i], &dst[i]);
 
 	/* Zeroize sensitive information. */
 	memset(sctx, 0, sizeof(*sctx));
 
 	return 0;
 }
+
+int sha256_final(struct sha256_state *sctx, u8 *out)
+{
+	return __sha256_final(sctx, out, 8);
+}
+EXPORT_SYMBOL(sha256_final);
+
+int sha224_final(struct sha256_state *sctx, u8 *out)
+{
+	return __sha256_final(sctx, out, 7);
+}
+EXPORT_SYMBOL(sha224_final);
+
+MODULE_LICENSE("GPL");
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index 3bb6260..2dceaca 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -639,30 +639,12 @@
 	**************  MIPS  *****************
 	***************************************/
 #if defined(__mips__) && W_TYPE_SIZE == 32
-#if (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
 #define umul_ppmm(w1, w0, u, v)			\
 do {						\
 	UDItype __ll = (UDItype)(u) * (v);	\
 	w1 = __ll >> 32;			\
 	w0 = __ll;				\
 } while (0)
-#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
-#define umul_ppmm(w1, w0, u, v) \
-	__asm__ ("multu %2,%3" \
-	: "=l" ((USItype)(w0)), \
-	     "=h" ((USItype)(w1)) \
-	: "d" ((USItype)(u)), \
-	     "d" ((USItype)(v)))
-#else
-#define umul_ppmm(w1, w0, u, v) \
-	__asm__ ("multu %2,%3\n" \
-	   "mflo %0\n" \
-	   "mfhi %1" \
-	: "=d" ((USItype)(w0)), \
-	     "=d" ((USItype)(w1)) \
-	: "d" ((USItype)(u)), \
-	     "d" ((USItype)(v)))
-#endif
 #define UMUL_TIME 10
 #define UDIV_TIME 100
 #endif /* __mips__ */
@@ -687,7 +669,7 @@
 		 : "d" ((UDItype)(u)),					\
 		   "d" ((UDItype)(v)));					\
 } while (0)
-#elif (__GNUC__ >= 5) || (__GNUC__ >= 4 && __GNUC_MINOR__ >= 4)
+#else
 #define umul_ppmm(w1, w0, u, v) \
 do {									\
 	typedef unsigned int __ll_UTItype __attribute__((mode(TI)));	\
@@ -695,22 +677,6 @@
 	w1 = __ll >> 64;						\
 	w0 = __ll;							\
 } while (0)
-#elif __GNUC__ > 2 || __GNUC_MINOR__ >= 7
-#define umul_ppmm(w1, w0, u, v) \
-	__asm__ ("dmultu %2,%3" \
-	: "=l" ((UDItype)(w0)), \
-	     "=h" ((UDItype)(w1)) \
-	: "d" ((UDItype)(u)), \
-	     "d" ((UDItype)(v)))
-#else
-#define umul_ppmm(w1, w0, u, v) \
-	__asm__ ("dmultu %2,%3\n" \
-	   "mflo %0\n" \
-	   "mfhi %1" \
-	: "=d" ((UDItype)(w0)), \
-	     "=d" ((UDItype)(w1)) \
-	: "d" ((UDItype)(u)), \
-	     "d" ((UDItype)(v)))
 #endif
 #define UMUL_TIME 20
 #define UDIV_TIME 140
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 2efac04..3803135 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -10,7 +10,8 @@
 	select CRC16
 	select CRYPTO
 	select CRYPTO_BLKCIPHER
-	select CRYPTO_AES
+	select CRYPTO_LIB_AES
+	imply CRYPTO_AES
 	select CRYPTO_CMAC
 	select CRYPTO_ECB
 	select CRYPTO_SHA256
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 6c2b4e6..26e8cfad 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -23,6 +23,7 @@
 #include <linux/debugfs.h>
 #include <linux/scatterlist.h>
 #include <linux/crypto.h>
+#include <crypto/aes.h>
 #include <crypto/algapi.h>
 #include <crypto/b128ops.h>
 #include <crypto/hash.h>
@@ -88,7 +89,6 @@
 	u8			local_rand[16];
 	bool			debug_key;
 
-	struct crypto_cipher	*tfm_aes;
 	struct crypto_shash	*tfm_cmac;
 	struct crypto_kpp	*tfm_ecdh;
 };
@@ -127,7 +127,6 @@
 	u8			dhkey[32];
 	u8			mackey[16];
 
-	struct crypto_cipher	*tfm_aes;
 	struct crypto_shash	*tfm_cmac;
 	struct crypto_kpp	*tfm_ecdh;
 };
@@ -377,22 +376,18 @@
  * s1 and ah.
  */
 
-static int smp_e(struct crypto_cipher *tfm, const u8 *k, u8 *r)
+static int smp_e(const u8 *k, u8 *r)
 {
+	struct crypto_aes_ctx ctx;
 	uint8_t tmp[16], data[16];
 	int err;
 
 	SMP_DBG("k %16phN r %16phN", k, r);
 
-	if (!tfm) {
-		BT_ERR("tfm %p", tfm);
-		return -EINVAL;
-	}
-
 	/* The most significant octet of key corresponds to k[0] */
 	swap_buf(k, tmp, 16);
 
-	err = crypto_cipher_setkey(tfm, tmp, 16);
+	err = aes_expandkey(&ctx, tmp, 16);
 	if (err) {
 		BT_ERR("cipher setkey failed: %d", err);
 		return err;
@@ -401,17 +396,18 @@
 	/* Most significant octet of plaintextData corresponds to data[0] */
 	swap_buf(r, data, 16);
 
-	crypto_cipher_encrypt_one(tfm, data, data);
+	aes_encrypt(&ctx, data, data);
 
 	/* Most significant octet of encryptedData corresponds to data[0] */
 	swap_buf(data, r, 16);
 
 	SMP_DBG("r %16phN", r);
 
+	memzero_explicit(&ctx, sizeof (ctx));
 	return err;
 }
 
-static int smp_c1(struct crypto_cipher *tfm_aes, const u8 k[16],
+static int smp_c1(const u8 k[16],
 		  const u8 r[16], const u8 preq[7], const u8 pres[7], u8 _iat,
 		  const bdaddr_t *ia, u8 _rat, const bdaddr_t *ra, u8 res[16])
 {
@@ -436,7 +432,7 @@
 	u128_xor((u128 *) res, (u128 *) r, (u128 *) p1);
 
 	/* res = e(k, res) */
-	err = smp_e(tfm_aes, k, res);
+	err = smp_e(k, res);
 	if (err) {
 		BT_ERR("Encrypt data error");
 		return err;
@@ -453,14 +449,14 @@
 	u128_xor((u128 *) res, (u128 *) res, (u128 *) p2);
 
 	/* res = e(k, res) */
-	err = smp_e(tfm_aes, k, res);
+	err = smp_e(k, res);
 	if (err)
 		BT_ERR("Encrypt data error");
 
 	return err;
 }
 
-static int smp_s1(struct crypto_cipher *tfm_aes, const u8 k[16],
+static int smp_s1(const u8 k[16],
 		  const u8 r1[16], const u8 r2[16], u8 _r[16])
 {
 	int err;
@@ -469,15 +465,14 @@
 	memcpy(_r, r2, 8);
 	memcpy(_r + 8, r1, 8);
 
-	err = smp_e(tfm_aes, k, _r);
+	err = smp_e(k, _r);
 	if (err)
 		BT_ERR("Encrypt data error");
 
 	return err;
 }
 
-static int smp_ah(struct crypto_cipher *tfm, const u8 irk[16],
-		  const u8 r[3], u8 res[3])
+static int smp_ah(const u8 irk[16], const u8 r[3], u8 res[3])
 {
 	u8 _res[16];
 	int err;
@@ -486,7 +481,7 @@
 	memcpy(_res, r, 3);
 	memset(_res + 3, 0, 13);
 
-	err = smp_e(tfm, irk, _res);
+	err = smp_e(irk, _res);
 	if (err) {
 		BT_ERR("Encrypt error");
 		return err;
@@ -518,7 +513,7 @@
 
 	BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk);
 
-	err = smp_ah(smp->tfm_aes, irk, &bdaddr->b[3], hash);
+	err = smp_ah(irk, &bdaddr->b[3], hash);
 	if (err)
 		return false;
 
@@ -541,7 +536,7 @@
 	rpa->b[5] &= 0x3f;	/* Clear two most significant bits */
 	rpa->b[5] |= 0x40;	/* Set second most significant bit */
 
-	err = smp_ah(smp->tfm_aes, irk, &rpa->b[3], rpa->b);
+	err = smp_ah(irk, &rpa->b[3], rpa->b);
 	if (err < 0)
 		return err;
 
@@ -768,7 +763,6 @@
 	kzfree(smp->slave_csrk);
 	kzfree(smp->link_key);
 
-	crypto_free_cipher(smp->tfm_aes);
 	crypto_free_shash(smp->tfm_cmac);
 	crypto_free_kpp(smp->tfm_ecdh);
 
@@ -957,7 +951,7 @@
 
 	BT_DBG("conn %p", conn);
 
-	ret = smp_c1(smp->tfm_aes, smp->tk, smp->prnd, smp->preq, smp->prsp,
+	ret = smp_c1(smp->tk, smp->prnd, smp->preq, smp->prsp,
 		     conn->hcon->init_addr_type, &conn->hcon->init_addr,
 		     conn->hcon->resp_addr_type, &conn->hcon->resp_addr,
 		     cp.confirm_val);
@@ -983,12 +977,9 @@
 	u8 confirm[16];
 	int ret;
 
-	if (IS_ERR_OR_NULL(smp->tfm_aes))
-		return SMP_UNSPECIFIED;
-
 	BT_DBG("conn %p %s", conn, conn->hcon->out ? "master" : "slave");
 
-	ret = smp_c1(smp->tfm_aes, smp->tk, smp->rrnd, smp->preq, smp->prsp,
+	ret = smp_c1(smp->tk, smp->rrnd, smp->preq, smp->prsp,
 		     hcon->init_addr_type, &hcon->init_addr,
 		     hcon->resp_addr_type, &hcon->resp_addr, confirm);
 	if (ret)
@@ -1005,7 +996,7 @@
 		__le64 rand = 0;
 		__le16 ediv = 0;
 
-		smp_s1(smp->tfm_aes, smp->tk, smp->rrnd, smp->prnd, stk);
+		smp_s1(smp->tk, smp->rrnd, smp->prnd, stk);
 
 		if (test_and_set_bit(HCI_CONN_ENCRYPT_PEND, &hcon->flags))
 			return SMP_UNSPECIFIED;
@@ -1021,7 +1012,7 @@
 		smp_send_cmd(conn, SMP_CMD_PAIRING_RANDOM, sizeof(smp->prnd),
 			     smp->prnd);
 
-		smp_s1(smp->tfm_aes, smp->tk, smp->prnd, smp->rrnd, stk);
+		smp_s1(smp->tk, smp->prnd, smp->rrnd, stk);
 
 		if (hcon->pending_sec_level == BT_SECURITY_HIGH)
 			auth = 1;
@@ -1389,16 +1380,10 @@
 	if (!smp)
 		return NULL;
 
-	smp->tfm_aes = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(smp->tfm_aes)) {
-		BT_ERR("Unable to create AES crypto context");
-		goto zfree_smp;
-	}
-
 	smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(smp->tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		goto free_cipher;
+		goto zfree_smp;
 	}
 
 	smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0);
@@ -1420,8 +1405,6 @@
 
 free_shash:
 	crypto_free_shash(smp->tfm_cmac);
-free_cipher:
-	crypto_free_cipher(smp->tfm_aes);
 zfree_smp:
 	kzfree(smp);
 	return NULL;
@@ -3232,7 +3215,6 @@
 {
 	struct l2cap_chan *chan;
 	struct smp_dev *smp;
-	struct crypto_cipher *tfm_aes;
 	struct crypto_shash *tfm_cmac;
 	struct crypto_kpp *tfm_ecdh;
 
@@ -3245,17 +3227,9 @@
 	if (!smp)
 		return ERR_PTR(-ENOMEM);
 
-	tfm_aes = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(tfm_aes)) {
-		BT_ERR("Unable to create AES crypto context");
-		kzfree(smp);
-		return ERR_CAST(tfm_aes);
-	}
-
 	tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_cipher(tfm_aes);
 		kzfree(smp);
 		return ERR_CAST(tfm_cmac);
 	}
@@ -3264,13 +3238,11 @@
 	if (IS_ERR(tfm_ecdh)) {
 		BT_ERR("Unable to create ECDH crypto context");
 		crypto_free_shash(tfm_cmac);
-		crypto_free_cipher(tfm_aes);
 		kzfree(smp);
 		return ERR_CAST(tfm_ecdh);
 	}
 
 	smp->local_oob = false;
-	smp->tfm_aes = tfm_aes;
 	smp->tfm_cmac = tfm_cmac;
 	smp->tfm_ecdh = tfm_ecdh;
 
@@ -3278,7 +3250,6 @@
 	chan = l2cap_chan_create();
 	if (!chan) {
 		if (smp) {
-			crypto_free_cipher(smp->tfm_aes);
 			crypto_free_shash(smp->tfm_cmac);
 			crypto_free_kpp(smp->tfm_ecdh);
 			kzfree(smp);
@@ -3326,7 +3297,6 @@
 	smp = chan->data;
 	if (smp) {
 		chan->data = NULL;
-		crypto_free_cipher(smp->tfm_aes);
 		crypto_free_shash(smp->tfm_cmac);
 		crypto_free_kpp(smp->tfm_ecdh);
 		kzfree(smp);
@@ -3582,7 +3552,7 @@
 	return 0;
 }
 
-static int __init test_ah(struct crypto_cipher *tfm_aes)
+static int __init test_ah(void)
 {
 	const u8 irk[16] = {
 			0x9b, 0x7d, 0x39, 0x0a, 0xa6, 0x10, 0x10, 0x34,
@@ -3592,7 +3562,7 @@
 	u8 res[3];
 	int err;
 
-	err = smp_ah(tfm_aes, irk, r, res);
+	err = smp_ah(irk, r, res);
 	if (err)
 		return err;
 
@@ -3602,7 +3572,7 @@
 	return 0;
 }
 
-static int __init test_c1(struct crypto_cipher *tfm_aes)
+static int __init test_c1(void)
 {
 	const u8 k[16] = {
 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3622,7 +3592,7 @@
 	u8 res[16];
 	int err;
 
-	err = smp_c1(tfm_aes, k, r, preq, pres, _iat, &ia, _rat, &ra, res);
+	err = smp_c1(k, r, preq, pres, _iat, &ia, _rat, &ra, res);
 	if (err)
 		return err;
 
@@ -3632,7 +3602,7 @@
 	return 0;
 }
 
-static int __init test_s1(struct crypto_cipher *tfm_aes)
+static int __init test_s1(void)
 {
 	const u8 k[16] = {
 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
@@ -3647,7 +3617,7 @@
 	u8 res[16];
 	int err;
 
-	err = smp_s1(tfm_aes, k, r1, r2, res);
+	err = smp_s1(k, r1, r2, res);
 	if (err)
 		return err;
 
@@ -3828,8 +3798,7 @@
 	.llseek		= default_llseek,
 };
 
-static int __init run_selftests(struct crypto_cipher *tfm_aes,
-				struct crypto_shash *tfm_cmac,
+static int __init run_selftests(struct crypto_shash *tfm_cmac,
 				struct crypto_kpp *tfm_ecdh)
 {
 	ktime_t calltime, delta, rettime;
@@ -3844,19 +3813,19 @@
 		goto done;
 	}
 
-	err = test_ah(tfm_aes);
+	err = test_ah();
 	if (err) {
 		BT_ERR("smp_ah test failed");
 		goto done;
 	}
 
-	err = test_c1(tfm_aes);
+	err = test_c1();
 	if (err) {
 		BT_ERR("smp_c1 test failed");
 		goto done;
 	}
 
-	err = test_s1(tfm_aes);
+	err = test_s1();
 	if (err) {
 		BT_ERR("smp_s1 test failed");
 		goto done;
@@ -3913,21 +3882,13 @@
 
 int __init bt_selftest_smp(void)
 {
-	struct crypto_cipher *tfm_aes;
 	struct crypto_shash *tfm_cmac;
 	struct crypto_kpp *tfm_ecdh;
 	int err;
 
-	tfm_aes = crypto_alloc_cipher("aes", 0, 0);
-	if (IS_ERR(tfm_aes)) {
-		BT_ERR("Unable to create AES crypto context");
-		return PTR_ERR(tfm_aes);
-	}
-
 	tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0);
 	if (IS_ERR(tfm_cmac)) {
 		BT_ERR("Unable to create CMAC crypto context");
-		crypto_free_cipher(tfm_aes);
 		return PTR_ERR(tfm_cmac);
 	}
 
@@ -3935,14 +3896,12 @@
 	if (IS_ERR(tfm_ecdh)) {
 		BT_ERR("Unable to create ECDH crypto context");
 		crypto_free_shash(tfm_cmac);
-		crypto_free_cipher(tfm_aes);
 		return PTR_ERR(tfm_ecdh);
 	}
 
-	err = run_selftests(tfm_aes, tfm_cmac, tfm_ecdh);
+	err = run_selftests(tfm_cmac, tfm_ecdh);
 
 	crypto_free_shash(tfm_cmac);
-	crypto_free_cipher(tfm_aes);
 	crypto_free_kpp(tfm_ecdh);
 
 	return err;
diff --git a/tools/crypto/getstat.c b/tools/crypto/getstat.c
deleted file mode 100644
index 9e8ff76..0000000
--- a/tools/crypto/getstat.c
+++ /dev/null
@@ -1,294 +0,0 @@
-/* Heavily copied from libkcapi 2015 - 2017, Stephan Mueller <smueller@chronox.de> */
-#include <errno.h>
-#include <linux/cryptouser.h>
-#include <linux/netlink.h>
-#include <linux/rtnetlink.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <time.h>
-#include <unistd.h>
-
-#define CR_RTA(x)  ((struct rtattr *)(((char *)(x)) + NLMSG_ALIGN(sizeof(struct crypto_user_alg))))
-
-static int get_stat(const char *drivername)
-{
-	struct {
-		struct nlmsghdr n;
-		struct crypto_user_alg cru;
-	} req;
-	struct sockaddr_nl nl;
-	int sd = 0, ret;
-	socklen_t addr_len;
-	struct iovec iov;
-	struct msghdr msg;
-	char buf[4096];
-	struct nlmsghdr *res_n = (struct nlmsghdr *)buf;
-	struct crypto_user_alg *cru_res = NULL;
-	int res_len = 0;
-	struct rtattr *tb[CRYPTOCFGA_MAX + 1];
-	struct rtattr *rta;
-	struct nlmsgerr *errmsg;
-
-	memset(&req, 0, sizeof(req));
-	memset(&buf, 0, sizeof(buf));
-	memset(&msg, 0, sizeof(msg));
-
-	req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.cru));
-	req.n.nlmsg_flags = NLM_F_REQUEST;
-	req.n.nlmsg_type = CRYPTO_MSG_GETSTAT;
-	req.n.nlmsg_seq = time(NULL);
-
-	strncpy(req.cru.cru_driver_name, drivername, strlen(drivername));
-
-	sd =  socket(AF_NETLINK, SOCK_RAW, NETLINK_CRYPTO);
-	if (sd < 0) {
-		fprintf(stderr, "Netlink error: cannot open netlink socket");
-		return -errno;
-	}
-	memset(&nl, 0, sizeof(nl));
-	nl.nl_family = AF_NETLINK;
-	if (bind(sd, (struct sockaddr *)&nl, sizeof(nl)) < 0) {
-		ret = -errno;
-		fprintf(stderr, "Netlink error: cannot bind netlink socket");
-		goto out;
-	}
-
-	/* sanity check that netlink socket was successfully opened */
-	addr_len = sizeof(nl);
-	if (getsockname(sd, (struct sockaddr *)&nl, &addr_len) < 0) {
-		ret = -errno;
-		printf("Netlink error: cannot getsockname");
-		goto out;
-	}
-	if (addr_len != sizeof(nl)) {
-		ret = -errno;
-		printf("Netlink error: wrong address length %d", addr_len);
-		goto out;
-	}
-	if (nl.nl_family != AF_NETLINK) {
-		ret = -errno;
-		printf("Netlink error: wrong address family %d",
-				nl.nl_family);
-		goto out;
-	}
-
-	memset(&nl, 0, sizeof(nl));
-	nl.nl_family = AF_NETLINK;
-	iov.iov_base = (void *)&req.n;
-	iov.iov_len = req.n.nlmsg_len;
-	msg.msg_name = &nl;
-	msg.msg_namelen = sizeof(nl);
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
-	if (sendmsg(sd, &msg, 0) < 0) {
-		ret = -errno;
-		printf("Netlink error: sendmsg failed");
-		goto out;
-	}
-	memset(buf, 0, sizeof(buf));
-	iov.iov_base = buf;
-	while (1) {
-		iov.iov_len = sizeof(buf);
-		ret = recvmsg(sd, &msg, 0);
-		if (ret < 0) {
-			if (errno == EINTR || errno == EAGAIN)
-				continue;
-			ret = -errno;
-			printf("Netlink error: netlink receive error");
-			goto out;
-		}
-		if (ret == 0) {
-			ret = -errno;
-			printf("Netlink error: no data");
-			goto out;
-		}
-		if (ret > sizeof(buf)) {
-			ret = -errno;
-			printf("Netlink error: received too much data");
-			goto out;
-		}
-		break;
-	}
-
-	ret = -EFAULT;
-	res_len = res_n->nlmsg_len;
-	if (res_n->nlmsg_type == NLMSG_ERROR) {
-		errmsg = NLMSG_DATA(res_n);
-		fprintf(stderr, "Fail with %d\n", errmsg->error);
-		ret = errmsg->error;
-		goto out;
-	}
-
-	if (res_n->nlmsg_type == CRYPTO_MSG_GETSTAT) {
-		cru_res = NLMSG_DATA(res_n);
-		res_len -= NLMSG_SPACE(sizeof(*cru_res));
-	}
-	if (res_len < 0) {
-		printf("Netlink error: nlmsg len %d\n", res_len);
-		goto out;
-	}
-
-	if (!cru_res) {
-		ret = -EFAULT;
-		printf("Netlink error: no cru_res\n");
-		goto out;
-	}
-
-	rta = CR_RTA(cru_res);
-	memset(tb, 0, sizeof(struct rtattr *) * (CRYPTOCFGA_MAX + 1));
-	while (RTA_OK(rta, res_len)) {
-		if ((rta->rta_type <= CRYPTOCFGA_MAX) && (!tb[rta->rta_type]))
-			tb[rta->rta_type] = rta;
-		rta = RTA_NEXT(rta, res_len);
-	}
-	if (res_len) {
-		printf("Netlink error: unprocessed data %d",
-				res_len);
-		goto out;
-	}
-
-	if (tb[CRYPTOCFGA_STAT_HASH]) {
-		struct rtattr *rta = tb[CRYPTOCFGA_STAT_HASH];
-		struct crypto_stat_hash *rhash =
-			(struct crypto_stat_hash *)RTA_DATA(rta);
-		printf("%s\tHash\n\tHash: %llu bytes: %llu\n\tErrors: %llu\n",
-			drivername,
-			rhash->stat_hash_cnt, rhash->stat_hash_tlen,
-			rhash->stat_err_cnt);
-	} else if (tb[CRYPTOCFGA_STAT_COMPRESS]) {
-		struct rtattr *rta = tb[CRYPTOCFGA_STAT_COMPRESS];
-		struct crypto_stat_compress *rblk =
-			(struct crypto_stat_compress *)RTA_DATA(rta);
-		printf("%s\tCompress\n\tCompress: %llu bytes: %llu\n\tDecompress: %llu bytes: %llu\n\tErrors: %llu\n",
-			drivername,
-			rblk->stat_compress_cnt, rblk->stat_compress_tlen,
-			rblk->stat_decompress_cnt, rblk->stat_decompress_tlen,
-			rblk->stat_err_cnt);
-	} else if (tb[CRYPTOCFGA_STAT_ACOMP]) {
-		struct rtattr *rta = tb[CRYPTOCFGA_STAT_ACOMP];
-		struct crypto_stat_compress *rcomp =
-			(struct crypto_stat_compress *)RTA_DATA(rta);
-		printf("%s\tACompress\n\tCompress: %llu bytes: %llu\n\tDecompress: %llu bytes: %llu\n\tErrors: %llu\n",
-			drivername,
-			rcomp->stat_compress_cnt, rcomp->stat_compress_tlen,
-			rcomp->stat_decompress_cnt, rcomp->stat_decompress_tlen,
-			rcomp->stat_err_cnt);
-	} else if (tb[CRYPTOCFGA_STAT_AEAD]) {
-		struct rtattr *rta = tb[CRYPTOCFGA_STAT_AEAD];
-		struct crypto_stat_aead *raead =
-			(struct crypto_stat_aead *)RTA_DATA(rta);
-		printf("%s\tAEAD\n\tEncrypt: %llu bytes: %llu\n\tDecrypt: %llu bytes: %llu\n\tErrors: %llu\n",
-			drivername,
-			raead->stat_encrypt_cnt, raead->stat_encrypt_tlen,
-			raead->stat_decrypt_cnt, raead->stat_decrypt_tlen,
-			raead->stat_err_cnt);
-	} else if (tb[CRYPTOCFGA_STAT_BLKCIPHER]) {
-		struct rtattr *rta = tb[CRYPTOCFGA_STAT_BLKCIPHER];
-		struct crypto_stat_cipher *rblk =
-			(struct crypto_stat_cipher *)RTA_DATA(rta);
-		printf("%s\tCipher\n\tEncrypt: %llu bytes: %llu\n\tDecrypt: %llu bytes: %llu\n\tErrors: %llu\n",
-			drivername,
-			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
-			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
-			rblk->stat_err_cnt);
-	} else if (tb[CRYPTOCFGA_STAT_AKCIPHER]) {
-		struct rtattr *rta = tb[CRYPTOCFGA_STAT_AKCIPHER];
-		struct crypto_stat_akcipher *rblk =
-			(struct crypto_stat_akcipher *)RTA_DATA(rta);
-		printf("%s\tAkcipher\n\tEncrypt: %llu bytes: %llu\n\tDecrypt: %llu bytes: %llu\n\tSign: %llu\n\tVerify: %llu\n\tErrors: %llu\n",
-			drivername,
-			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
-			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
-			rblk->stat_sign_cnt, rblk->stat_verify_cnt,
-			rblk->stat_err_cnt);
-	} else if (tb[CRYPTOCFGA_STAT_CIPHER]) {
-		struct rtattr *rta = tb[CRYPTOCFGA_STAT_CIPHER];
-		struct crypto_stat_cipher *rblk =
-			(struct crypto_stat_cipher *)RTA_DATA(rta);
-		printf("%s\tcipher\n\tEncrypt: %llu bytes: %llu\n\tDecrypt: %llu bytes: %llu\n\tErrors: %llu\n",
-			drivername,
-			rblk->stat_encrypt_cnt, rblk->stat_encrypt_tlen,
-			rblk->stat_decrypt_cnt, rblk->stat_decrypt_tlen,
-			rblk->stat_err_cnt);
-	} else if (tb[CRYPTOCFGA_STAT_RNG]) {
-		struct rtattr *rta = tb[CRYPTOCFGA_STAT_RNG];
-		struct crypto_stat_rng *rrng =
-			(struct crypto_stat_rng *)RTA_DATA(rta);
-		printf("%s\tRNG\n\tSeed: %llu\n\tGenerate: %llu bytes: %llu\n\tErrors: %llu\n",
-			drivername,
-			rrng->stat_seed_cnt,
-			rrng->stat_generate_cnt, rrng->stat_generate_tlen,
-			rrng->stat_err_cnt);
-	} else if (tb[CRYPTOCFGA_STAT_KPP]) {
-		struct rtattr *rta = tb[CRYPTOCFGA_STAT_KPP];
-		struct crypto_stat_kpp *rkpp =
-			(struct crypto_stat_kpp *)RTA_DATA(rta);
-		printf("%s\tKPP\n\tSetsecret: %llu\n\tGenerate public key: %llu\n\tCompute_shared_secret: %llu\n\tErrors: %llu\n",
-			drivername,
-			rkpp->stat_setsecret_cnt,
-			rkpp->stat_generate_public_key_cnt,
-			rkpp->stat_compute_shared_secret_cnt,
-			rkpp->stat_err_cnt);
-	} else {
-		fprintf(stderr, "%s is of an unknown algorithm\n", drivername);
-	}
-	ret = 0;
-out:
-	close(sd);
-	return ret;
-}
-
-int main(int argc, const char *argv[])
-{
-	char buf[4096];
-	FILE *procfd;
-	int i, lastspace;
-	int ret;
-
-	procfd = fopen("/proc/crypto", "r");
-	if (!procfd) {
-		ret = errno;
-		fprintf(stderr, "Cannot open /proc/crypto %s\n", strerror(errno));
-		return ret;
-	}
-	if (argc > 1) {
-		if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
-			printf("Usage: %s [-h|--help] display this help\n", argv[0]);
-			printf("Usage: %s display all crypto statistics\n", argv[0]);
-			printf("Usage: %s drivername1 drivername2 ... = display crypto statistics about drivername1 ...\n", argv[0]);
-			return 0;
-		}
-		for (i = 1; i < argc; i++) {
-			ret = get_stat(argv[i]);
-			if (ret) {
-				fprintf(stderr, "Failed with %s\n", strerror(-ret));
-				return ret;
-			}
-		}
-		return 0;
-	}
-
-	while (fgets(buf, sizeof(buf), procfd)) {
-		if (!strncmp(buf, "driver", 6)) {
-			lastspace = 0;
-			i = 0;
-			while (i < strlen(buf)) {
-				i++;
-				if (buf[i] == ' ')
-					lastspace = i;
-			}
-			buf[strlen(buf) - 1] = '\0';
-			ret = get_stat(buf + lastspace + 1);
-			if (ret) {
-				fprintf(stderr, "Failed with %s\n", strerror(-ret));
-				goto out;
-			}
-		}
-	}
-out:
-	fclose(procfd);
-	return ret;
-}